In [14]:
#!pip install bertopic
#!pip install sentence-transformers[cpu]
#!pip uninstall torch
#!pip install torch==1.9.0+cpu torchvision==0.10.0+cpu torchaudio==0.9.0 -f https://download.pytorch.org/whl/torch_stable.html
Found existing installation: torch 1.13.1
Uninstalling torch-1.13.1:
  Would remove:
    /opt/conda/bin/convert-caffe2-to-onnx
    /opt/conda/bin/convert-onnx-to-caffe2
    /opt/conda/bin/torchrun
    /opt/conda/lib/python3.7/site-packages/functorch/*
    /opt/conda/lib/python3.7/site-packages/torch-1.13.1.dist-info/*
    /opt/conda/lib/python3.7/site-packages/torch/*
    /opt/conda/lib/python3.7/site-packages/torchgen/*
Proceed (Y/n)? ^C
ERROR: Operation cancelled by user
Looking in links: https://download.pytorch.org/whl/torch_stable.html
Collecting torch==1.9.0+cpu
  Downloading https://download.pytorch.org/whl/cpu/torch-1.9.0%2Bcpu-cp37-cp37m-linux_x86_64.whl (175.5 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 175.5/175.5 MB 5.4 MB/s eta 0:00:0000:0100:01
Collecting torchvision==0.10.0+cpu
  Downloading https://download.pytorch.org/whl/cpu/torchvision-0.10.0%2Bcpu-cp37-cp37m-linux_x86_64.whl (15.7 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 15.7/15.7 MB 52.8 MB/s eta 0:00:0000:0100:01
Collecting torchaudio==0.9.0
  Downloading torchaudio-0.9.0-cp37-cp37m-manylinux1_x86_64.whl (1.9 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 1.9/1.9 MB 22.1 MB/s eta 0:00:0000:010:01
Requirement already satisfied: typing-extensions in /opt/conda/lib/python3.7/site-packages (from torch==1.9.0+cpu) (4.4.0)
Requirement already satisfied: numpy in /opt/conda/lib/python3.7/site-packages (from torchvision==0.10.0+cpu) (1.21.6)
Requirement already satisfied: pillow>=5.3.0 in /opt/conda/lib/python3.7/site-packages (from torchvision==0.10.0+cpu) (9.5.0)
Installing collected packages: torch, torchvision, torchaudio
  Attempting uninstall: torch
    Found existing installation: torch 1.13.1
    Uninstalling torch-1.13.1:
      Successfully uninstalled torch-1.13.1
  Attempting uninstall: torchvision
    Found existing installation: torchvision 0.14.1
    Uninstalling torchvision-0.14.1:
      Successfully uninstalled torchvision-0.14.1
Successfully installed torch-1.9.0+cpu torchaudio-0.9.0 torchvision-0.10.0+cpu
In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from google.cloud import storage
import nltk as nltk
import ast

import os
os.environ["CUDA_VISIBLE_DEVICES"] = ""
os.environ["TOKENIZERS_PARALLELISM"] = "false"

from bertopic import BERTopic
2023-08-15 22:57:20.111246: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-08-15 22:57:21.175694: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/cuda/lib64:/usr/local/nccl2/lib:/usr/local/cuda/extras/CUPTI/lib64
2023-08-15 22:57:21.175854: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/cuda/lib64:/usr/local/nccl2/lib:/usr/local/cuda/extras/CUPTI/lib64
2023-08-15 22:57:21.175867: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly.
2023-08-15 22:57:23.387595: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/cuda/lib64:/usr/local/nccl2/lib:/usr/local/cuda/extras/CUPTI/lib64
2023-08-15 22:57:23.387651: W tensorflow/compiler/xla/stream_executor/cuda/cuda_driver.cc:265] failed call to cuInit: UNKNOWN ERROR (303)
2023-08-15 22:57:23.387677: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (instance-20230809-105707): /proc/driver/nvidia/version does not exist
In [2]:
import multiprocessing
from pandarallel import pandarallel
num_processors = multiprocessing.cpu_count()
print(f'Available CPUs: {num_processors}')

pandarallel.initialize(progress_bar=False, nb_workers=num_processors-1, use_memory_fs=False)
Available CPUs: 96
INFO: Pandarallel will run on 95 workers.
INFO: Pandarallel will use standard multiprocessing data transfer (pipe) to transfer data between the main process and workers.
In [3]:
%%time
df = pd.read_parquet('gs://nlp_final_ss/all_sents.parquet')
df.shape
CPU times: user 4.27 s, sys: 2.57 s, total: 6.85 s
Wall time: 18.7 s
Out[3]:
(183130, 13)
In [4]:
df.head()
Out[4]:
date clean_lem_title clean_lem_text title_length text_length neg neu pos compound sentiment sentiments_SVM Sentiments_LR Sentiments_NB
0 2020-02-27 child with autism saw their learn social skill... child with autism saw their learn social skill... 17 985 0.039 0.791 0.170 0.9992 Positive 0 0 0
1 2021-11-24 college technology launch ai retail lab the co... college technology launch ai retail lab the co... 8 565 0.021 0.872 0.107 0.9874 Positive 0 0 0
2 2022-06-09 child actor claude jarman jr look back movie c... child actor claude jarman jr look back movie c... 14 1720 0.073 0.724 0.204 0.9998 Positive 1 1 0
3 2020-04-17 cr bard inc ha return since smartrend recommen... cr bard inc ha return since smartrend recommen... 11 282 0.014 0.841 0.145 0.9871 Positive 0 0 0
4 2021-02-25 multicoreware inc becomes cevas trust partner ... multicoreware inc becomes cevas trust partner ... 12 588 0.021 0.809 0.170 0.9979 Positive 0 0 0
In [31]:
df['sentiment'].value_counts()
Out[31]:
Positive    172309
Negative     10227
Neutral        594
Name: sentiment, dtype: int64
In [4]:
%%time
df['text_tokens'] = df['clean_lem_text'].parallel_apply(nltk.word_tokenize)
CPU times: user 18.8 s, sys: 18.9 s, total: 37.7 s
Wall time: 47.7 s
In [32]:
df.head()
Out[32]:
date clean_lem_title clean_lem_text title_length text_length neg neu pos compound sentiment sentiments_SVM Sentiments_LR Sentiments_NB text_tokens
0 2020-02-27 child with autism saw their learn social skill... child with autism saw their learn social skill... 17 985 0.039 0.791 0.170 0.9992 Positive 0 0 0 [child, with, autism, saw, their, learn, socia...
1 2021-11-24 college technology launch ai retail lab the co... college technology launch ai retail lab the co... 8 565 0.021 0.872 0.107 0.9874 Positive 0 0 0 [college, technology, launch, ai, retail, lab,...
2 2022-06-09 child actor claude jarman jr look back movie c... child actor claude jarman jr look back movie c... 14 1720 0.073 0.724 0.204 0.9998 Positive 1 1 0 [child, actor, claude, jarman, jr, look, back,...
3 2020-04-17 cr bard inc ha return since smartrend recommen... cr bard inc ha return since smartrend recommen... 11 282 0.014 0.841 0.145 0.9871 Positive 0 0 0 [cr, bard, inc, ha, return, since, smartrend, ...
4 2021-02-25 multicoreware inc becomes cevas trust partner ... multicoreware inc becomes cevas trust partner ... 12 588 0.021 0.809 0.170 0.9979 Positive 0 0 0 [multicoreware, inc, becomes, cevas, trust, pa...
In [34]:
df.dtypes
Out[34]:
date               datetime64[ns]
clean_lem_title            object
clean_lem_text             object
title_length                int64
text_length                 int64
neg                       float64
neu                       float64
pos                       float64
compound                  float64
sentiment                  object
sentiments_SVM              int64
Sentiments_LR               int64
Sentiments_NB               int64
text_tokens                object
dtype: object

Bert Topics on Positive News Articles¶

In [5]:
positive_sentiments = df[df['sentiment'] == 'Positive']
positive_sentiments.shape
Out[5]:
(172309, 14)
In [7]:
positive_sentiments.head()
Out[7]:
date clean_lem_title clean_lem_text title_length text_length neg neu pos compound sentiment sentiments_SVM Sentiments_LR Sentiments_NB text_tokens
0 2020-02-27 child with autism saw their learn social skill... child with autism saw their learn social skill... 17 985 0.039 0.791 0.170 0.9992 Positive 0 0 0 [child, with, autism, saw, their, learn, socia...
1 2021-11-24 college technology launch ai retail lab the co... college technology launch ai retail lab the co... 8 565 0.021 0.872 0.107 0.9874 Positive 0 0 0 [college, technology, launch, ai, retail, lab,...
2 2022-06-09 child actor claude jarman jr look back movie c... child actor claude jarman jr look back movie c... 14 1720 0.073 0.724 0.204 0.9998 Positive 1 1 0 [child, actor, claude, jarman, jr, look, back,...
3 2020-04-17 cr bard inc ha return since smartrend recommen... cr bard inc ha return since smartrend recommen... 11 282 0.014 0.841 0.145 0.9871 Positive 0 0 0 [cr, bard, inc, ha, return, since, smartrend, ...
4 2021-02-25 multicoreware inc becomes cevas trust partner ... multicoreware inc becomes cevas trust partner ... 12 588 0.021 0.809 0.170 0.9979 Positive 0 0 0 [multicoreware, inc, becomes, cevas, trust, pa...
In [35]:
positive_sentiments.dtypes
Out[35]:
date               datetime64[ns]
clean_lem_title            object
clean_lem_text             object
title_length                int64
text_length                 int64
neg                       float64
neu                       float64
pos                       float64
compound                  float64
sentiment                  object
sentiments_SVM              int64
Sentiments_LR               int64
Sentiments_NB               int64
text_tokens                object
dtype: object
In [ ]:
pd.set_option('display.max_colwidth', 500) #500
positive_sentiments['text_tokens'].head(1)
In [6]:
positive_sentiments['text_tokens_string'] = positive_sentiments['text_tokens'].apply(' '.join)
In [7]:
%%time
mod_BERT_pos = BERTopic(calculate_probabilities=True, verbose=True, min_topic_size=50)
topics_pos, probabilities_pos = mod_BERT_pos.fit_transform(positive_sentiments['text_tokens_string'].tolist())
Batches:   0%|          | 0/5385 [00:00<?, ?it/s]
2023-08-15 23:35:50,744 - BERTopic - Transformed documents to Embeddings
2023-08-15 23:39:36,194 - BERTopic - Reduced dimensionality
2023-08-16 02:23:44,662 - BERTopic - Clustered reduced embeddings
CPU times: user 21h 53min 28s, sys: 4h 7min 37s, total: 1d 2h 1min 6s
Wall time: 3h 29min 56s
In [8]:
mod_BERT_pos.get_topic_info().head(20)
Out[8]:
Topic Count Name Representation Representative_Docs
0 -1 63170 -1_ai_gray_the_data [ai, gray, the, data, use, say, group, technol... [can ensure machine learn model trustworthy ho...
1 0 2516 0_cision_overviewview_overview_resource [cision, overviewview, overview, resource, all... [sberbankai alliance international junior ai c...
2 1 2229 1_market_analysis_growth_player [market, analysis, growth, player, global, for... [global artificial intelligence product market...
3 2 2136 2_newswires_presswire_ein_dakota [newswires, presswire, ein, dakota, virginia, ... [machine learn tax enforcement about about ein...
4 3 1630 3_market_artificial_intelligence_analysis [market, artificial, intelligence, analysis, g... [global artificial intelligence service market...
5 4 1336 4_npr_radio_schedule_donate [npr, radio, schedule, donate, wlrn, classical... [nprs planet money creates episode use artific...
6 5 1296 5_chatgpt_gpt4_openai_chatbot [chatgpt, gpt4, openai, chatbot, text, write, ... [chatgpt everything need know aipowered chatbo...
7 6 1287 6_ago_hour_bestreviews_nexstar [ago, hour, bestreviews, nexstar, weather, sto... [chatgptmaker openai sign deal ap license news...
8 7 1238 7_human_extinction_humanity_could [human, extinction, humanity, could, nuclear, ... [jos hernndezorallo ai expert the scale human ...
9 8 1092 8_student_teacher_school_education [student, teacher, school, education, classroo... [teacher fear chatgpt will make cheat easy tha...
10 9 1072 9_wfmztv_berk_lehigh_valley [wfmztv, berk, lehigh, valley, traffic, allent... [tackle ai select finalist fast company world ...
11 10 1024 10_venturebeat_vb_follow_homepage [venturebeat, vb, follow, homepage, gamesbeat,... [report consumer want use ai job venturebeat s...
12 11 972 11_coronavirus_virus_covid19_outbreak [coronavirus, virus, covid19, outbreak, vaccin... [can ai flag disease outbreak faster human not...
13 12 926 12_hunt_connectstoriestech_joblegalsign_discus... [hunt, connectstoriestech, joblegalsign, discu... [chatgpt data science use chatgpt become maste...
14 13 819 13_bard_google_chatbot_pichai [bard, google, chatbot, pichai, chatgpt, answe... [google bard cheat sheet what bard access skip...
15 14 779 14_nvidia_nvidias_dgx_gpus [nvidia, nvidias, dgx, gpus, huang, gpu, chip,... [nvidia smash performance record on ai inferen...
16 15 742 15_voice_speech_audio_liveperson [voice, speech, audio, liveperson, transcripti... [liveperson acquires voicebase tenfold power s...
17 16 656 16_china_chinese_beijing_xi [china, chinese, beijing, xi, military, shangh... [chinese firm rush milk chatgpt frenzy rival a...
18 17 647 17_africa_nigeria_ghana_african [africa, nigeria, ghana, african, nigerian, ar... [nigeria establish artificial intelligence rob...
19 18 647 18_brandvoice_pay_forbes_rich [brandvoice, pay, forbes, rich, program, best,... [eight way company can leverage ai in content ...
In [9]:
positive_topic_info_df = pd.DataFrame(mod_BERT_pos.get_topic_info())
print(positive_topic_info_df.shape)
positive_topic_info_df.head()
(663, 5)
Out[9]:
Topic Count Name Representation Representative_Docs
0 -1 63170 -1_ai_gray_the_data [ai, gray, the, data, use, say, group, technol... [can ensure machine learn model trustworthy ho...
1 0 2516 0_cision_overviewview_overview_resource [cision, overviewview, overview, resource, all... [sberbankai alliance international junior ai c...
2 1 2229 1_market_analysis_growth_player [market, analysis, growth, player, global, for... [global artificial intelligence product market...
3 2 2136 2_newswires_presswire_ein_dakota [newswires, presswire, ein, dakota, virginia, ... [machine learn tax enforcement about about ein...
4 3 1630 3_market_artificial_intelligence_analysis [market, artificial, intelligence, analysis, g... [global artificial intelligence service market...
In [10]:
positive_topic_info_df.to_parquet('positive_topic_info_df.parquet')

bucket_name = 'nlp_final_ss'
file_path = 'positive_topic_info_df.parquet'

storage_client = storage.Client()
bucket = storage_client.get_bucket(bucket_name)
blob = bucket.blob(file_path)
blob.upload_from_filename(file_path)
In [12]:
positive_sentiments['Bert_topics']=mod_BERT_pos.topics_
positive_sentiments['Bert_topics_words'] = positive_sentiments['Bert_topics'].apply(lambda x: mod_BERT_pos.get_topic(x))
In [13]:
positive_sentiments
Out[13]:
date clean_lem_title clean_lem_text title_length text_length neg neu pos compound sentiment sentiments_SVM Sentiments_LR Sentiments_NB text_tokens text_tokens_string Bert_topics Bert_topics_words
0 2020-02-27 child with autism saw their learn social skill... child with autism saw their learn social skill... 17 985 0.039 0.791 0.170 0.9992 Positive 0 0 0 [child, with, autism, saw, their, learn, socia... child with autism saw their learn social skill... -1 [(ai, 0.0015218465570483101), (gray, 0.0014727...
1 2021-11-24 college technology launch ai retail lab the co... college technology launch ai retail lab the co... 8 565 0.021 0.872 0.107 0.9874 Positive 0 0 0 [college, technology, launch, ai, retail, lab,... college technology launch ai retail lab the co... 277 [(student, 0.026801741568129638), (enrollment,...
2 2022-06-09 child actor claude jarman jr look back movie c... child actor claude jarman jr look back movie c... 14 1720 0.073 0.724 0.204 0.9998 Positive 1 1 0 [child, actor, claude, jarman, jr, look, back,... child actor claude jarman jr look back movie c... 64 [(humphrey, 0.03770472885972689), (claude, 0.0...
3 2020-04-17 cr bard inc ha return since smartrend recommen... cr bard inc ha return since smartrend recommen... 11 282 0.014 0.841 0.145 0.9871 Positive 0 0 0 [cr, bard, inc, ha, return, since, smartrend, ... cr bard inc ha return since smartrend recommen... 1 [(market, 0.010140089305656881), (analysis, 0....
4 2021-02-25 multicoreware inc becomes cevas trust partner ... multicoreware inc becomes cevas trust partner ... 12 588 0.021 0.809 0.170 0.9979 Positive 0 0 0 [multicoreware, inc, becomes, cevas, trust, pa... multicoreware inc becomes cevas trust partner ... 44 [(ip, 0.023631064706047734), (ipsoc, 0.0132072...
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
189969 2022-09-02 metas ai chatbot hate mark zuckerberg less bot... metas ai chatbot hate mark zuckerberg less bot... 10 1585 0.045 0.792 0.162 0.9988 Positive 0 0 0 [metas, ai, chatbot, hate, mark, zuckerberg, l... metas ai chatbot hate mark zuckerberg less bot... 5 [(chatgpt, 0.009597511471804692), (gpt4, 0.007...
189970 2022-03-20 florida panther acquire flyer captain claude g... florida panther acquire flyer captain claude g... 14 644 0.066 0.830 0.105 0.8628 Positive 0 0 0 [florida, panther, acquire, flyer, captain, cl... florida panther acquire flyer captain claude g... 68 [(giroux, 0.05158238507480131), (flyer, 0.0478...
189971 2020-12-04 google ai researcher exit spark ethic bias con... google ai researcher exit spark ethic bias con... 11 999 0.059 0.783 0.158 0.9953 Positive 0 0 0 [google, ai, researcher, exit, spark, ethic, b... google ai researcher exit spark ethic bias con... -1 [(ai, 0.0015218465570483101), (gray, 0.0014727...
189972 2023-02-22 vanderbilt university official review chatgptw... vanderbilt university official review chatgptw... 12 968 0.066 0.793 0.142 0.9932 Positive 0 0 0 [vanderbilt, university, official, review, cha... vanderbilt university official review chatgptw... 341 [(vanderbilt, 0.03399016691635063), (benbow, 0...
189973 2023-02-07 google look challenge chatgpt microsoft bard p... google look challenge chatgpt microsoft bard p... 8 645 0.106 0.708 0.186 0.9524 Positive 0 0 0 [google, look, challenge, chatgpt, microsoft, ... google look challenge chatgpt microsoft bard p... 13 [(bard, 0.0325175172094128), (google, 0.014325...

172309 rows × 17 columns

In [41]:
mod_BERT_pos.visualize_topics()
In [60]:
print("Number of topics:", mod_BERT_pos.get_topic_freq().shape[0])
Number of topics: 60
In [67]:
positive_sentiments.to_csv('gs://nlp_final_ss/Positive_BERTopics_Sentiments.csv',index=False)

Bert Topics on Negative News Articles¶

In [42]:
negative_sentiments = df[df['sentiment'] == 'Negative']
negative_sentiments.shape
Out[42]:
(10227, 14)
In [72]:
negative_sentiments.sample(5)
Out[72]:
date clean_lem_title clean_lem_text title_length text_length neg neu pos compound sentiment sentiments_SVM Sentiments_LR Sentiments_NB text_tokens text_tokens_string Bert_topics Bert_topics_words new_Bert_topics new_Bert_topics_words
144037 2023-05-26 warning power artificial intelligence godfather ai ypr warning power artificial intelligence godfather ai ypr search query show search news regional news npr news the worm the session regional news npr news the worm the session program weekly schedule news talk schedule dead night flavor under the big sky county resounds field day chrysti wordsmith weekly schedule news talk schedule dead night flavor under the big sky county resounds field day chrysti wordsmith support sustain pledge one time pledge update info vehicle donation business support ... 7 547 0.211 0.672 0.117 -0.9993 Negative 0 0 0 [warning, power, artificial, intelligence, godfather, ai, ypr, search, query, show, search, news, regional, news, npr, news, the, worm, the, session, regional, news, npr, news, the, worm, the, session, program, weekly, schedule, news, talk, schedule, dead, night, flavor, under, the, big, sky, county, resounds, field, day, chrysti, wordsmith, weekly, schedule, news, talk, schedule, dead, night, flavor, under, the, big, sky, county, resounds, field, day, chrysti, wordsmith, support, sustain, p... warning power artificial intelligence godfather ai ypr search query show search news regional news npr news the worm the session regional news npr news the worm the session program weekly schedule news talk schedule dead night flavor under the big sky county resounds field day chrysti wordsmith weekly schedule news talk schedule dead night flavor under the big sky county resounds field day chrysti wordsmith support sustain pledge one time pledge update info vehicle donation business support ... -1 [(ai, 0.017137036260213664), (the, 0.013858771488878598), (news, 0.012329264511106698), (new, 0.010745623813679906), (use, 0.010175887657488954), (say, 0.009991610194330772), (technology, 0.00954382264301549), (data, 0.008967752709379285), (company, 0.008776755643810932), (business, 0.008240774870109104)] 5 [(radio, 0.025601460220662987), (npr, 0.024728194221050617), (schedule, 0.022859831764487287), (donate, 0.01683511861676043), (listen, 0.01608025759738875), (classical, 0.01474993688050423), (program, 0.014714258114866154), (public, 0.014568540194826446), (volunteer, 0.014414343884374218), (air, 0.014184828467437545)]
95429 2022-11-04 magellan ai verify thirdparty attribution partner spreaker iheart magellan ai verify thirdparty attribution partner spreaker iheart skip content5050 restaurant guidethe pledge allegiancelivetv schedulehurricane ushomepagedownload appsshare your picsrecent videoswatch heroesvail silencejeff davis forecastriver stagessportssubmit photossports person weektwo dayshigh statehurricane health dividecommunitygas price trackerheart scheduleabout uslatest country music tvgray dccpress releasesmagellan ai verify thirdparty attribution partner spreaker iheartpublished... 8 373 0.103 0.798 0.099 -0.9733 Negative 0 0 0 [magellan, ai, verify, thirdparty, attribution, partner, spreaker, iheart, skip, content5050, restaurant, guidethe, pledge, allegiancelivetv, schedulehurricane, ushomepagedownload, appsshare, your, picsrecent, videoswatch, heroesvail, silencejeff, davis, forecastriver, stagessportssubmit, photossports, person, weektwo, dayshigh, statehurricane, health, dividecommunitygas, price, trackerheart, scheduleabout, uslatest, country, music, tvgray, dccpress, releasesmagellan, ai, verify, thirdparty,... magellan ai verify thirdparty attribution partner spreaker iheart skip content5050 restaurant guidethe pledge allegiancelivetv schedulehurricane ushomepagedownload appsshare your picsrecent videoswatch heroesvail silencejeff davis forecastriver stagessportssubmit photossports person weektwo dayshigh statehurricane health dividecommunitygas price trackerheart scheduleabout uslatest country music tvgray dccpress releasesmagellan ai verify thirdparty attribution partner spreaker iheartpublished... -1 [(ai, 0.017137036260213664), (the, 0.013858771488878598), (news, 0.012329264511106698), (new, 0.010745623813679906), (use, 0.010175887657488954), (say, 0.009991610194330772), (technology, 0.00954382264301549), (data, 0.008967752709379285), (company, 0.008776755643810932), (business, 0.008240774870109104)] -1 [(ai, 0.005009518981689281), (the, 0.0044896498561899445), (data, 0.00398180657354146), (market, 0.0038157315091556674), (technology, 0.0037888504275099136), (new, 0.0037830048186198056), (company, 0.0037728531523221107), (say, 0.0036115974917125745), (use, 0.003536299833760029), (news, 0.00352722409158165)]
145005 2023-04-11 alphasense raise 100m lead by alphabet capitalg fuel innovation ai market intelligence alphasense raise 100m lead by alphabet capitalg fuel innovation ai market intelligence skip contentsky camsbusiness partnerscommunity calendartop teacherlive healthybooks break afternoon scheduledownload our appssubmit photo centergas newsmurdaugh casenationalquinton simon casestephen smith casetrafficwtoc investigatesfirst alert weatherastronomical tide and river centersky camssummer weather tipswhat is first alert weather reportend zonegolfghost piratesplay healthybirthday clubbooks kidsca... 12 651 0.142 0.735 0.123 -0.9727 Negative 0 0 0 [alphasense, raise, 100m, lead, by, alphabet, capitalg, fuel, innovation, ai, market, intelligence, skip, contentsky, camsbusiness, partnerscommunity, calendartop, teacherlive, healthybooks, break, afternoon, scheduledownload, our, appssubmit, photo, centergas, newsmurdaugh, casenationalquinton, simon, casestephen, smith, casetrafficwtoc, investigatesfirst, alert, weatherastronomical, tide, and, river, centersky, camssummer, weather, tipswhat, is, first, alert, weather, reportend, zonegolfgh... alphasense raise 100m lead by alphabet capitalg fuel innovation ai market intelligence skip contentsky camsbusiness partnerscommunity calendartop teacherlive healthybooks break afternoon scheduledownload our appssubmit photo centergas newsmurdaugh casenationalquinton simon casestephen smith casetrafficwtoc investigatesfirst alert weatherastronomical tide and river centersky camssummer weather tipswhat is first alert weather reportend zonegolfghost piratesplay healthybirthday clubbooks kidsca... 4 [(gray, 0.02364868084561796), (ai, 0.021262426009838183), (group, 0.01855959793207819), (platform, 0.017687752055991737), (data, 0.01717251726215466), (medium, 0.016064153080916695), (customer, 0.01568195444590948), (prnewswire, 0.013425164969089265), (press, 0.013038151930951095), (company, 0.012913612704479053)] -1 [(ai, 0.005009518981689281), (the, 0.0044896498561899445), (data, 0.00398180657354146), (market, 0.0038157315091556674), (technology, 0.0037888504275099136), (new, 0.0037830048186198056), (company, 0.0037728531523221107), (say, 0.0036115974917125745), (use, 0.003536299833760029), (news, 0.00352722409158165)]
101899 2023-01-16 chatgpt help hacker write malicious code steal personal data cio news et cio chatgpt help hacker write malicious code steal personal data cio news et cio sign insign news news internet thing security nextgen technology cloud compute business analytics strategy management big data mobility service apps consumer tech data center case study corporate social medium policy internet industry industry healthcare automotive manufacture financial service retail itites banking case study case study digital transformation analytics ai rpa iot customer experience datacenter clou... 13 720 0.148 0.754 0.098 -0.9393 Negative 0 0 0 [chatgpt, help, hacker, write, malicious, code, steal, personal, data, cio, news, et, cio, sign, insign, news, news, internet, thing, security, nextgen, technology, cloud, compute, business, analytics, strategy, management, big, data, mobility, service, apps, consumer, tech, data, center, case, study, corporate, social, medium, policy, internet, industry, industry, healthcare, automotive, manufacture, financial, service, retail, itites, banking, case, study, case, study, digital, transformat... chatgpt help hacker write malicious code steal personal data cio news et cio sign insign news news internet thing security nextgen technology cloud compute business analytics strategy management big data mobility service apps consumer tech data center case study corporate social medium policy internet industry industry healthcare automotive manufacture financial service retail itites banking case study case study digital transformation analytics ai rpa iot customer experience datacenter clou... -1 [(ai, 0.017137036260213664), (the, 0.013858771488878598), (news, 0.012329264511106698), (new, 0.010745623813679906), (use, 0.010175887657488954), (say, 0.009991610194330772), (technology, 0.00954382264301549), (data, 0.008967752709379285), (company, 0.008776755643810932), (business, 0.008240774870109104)] -1 [(ai, 0.005009518981689281), (the, 0.0044896498561899445), (data, 0.00398180657354146), (market, 0.0038157315091556674), (technology, 0.0037888504275099136), (new, 0.0037830048186198056), (company, 0.0037728531523221107), (say, 0.0036115974917125745), (use, 0.003536299833760029), (news, 0.00352722409158165)]
156440 2023-05-07 world news iran us artificial intelligence maximize suppression expert latestly world news iran us artificial intelligence maximize suppression expert latestly advertisement live break news tiger hide sugarcane field leap into air catch fly drone uttar pradeshs sitapur scary video surface jos buttler wicket video rajasthan royal batsman miss out hundred get dismiss during rr srh ipl match incest in peace un make huge blunder tweet promote peace spark memefest online check funny meme joke english sunday may late story minute ago jos buttler wicket video rajasthan royal b... 10 1581 0.163 0.665 0.172 -0.8419 Negative 0 0 0 [world, news, iran, us, artificial, intelligence, maximize, suppression, expert, latestly, advertisement, live, break, news, tiger, hide, sugarcane, field, leap, into, air, catch, fly, drone, uttar, pradeshs, sitapur, scary, video, surface, jos, buttler, wicket, video, rajasthan, royal, batsman, miss, out, hundred, get, dismiss, during, rr, srh, ipl, match, incest, in, peace, un, make, huge, blunder, tweet, promote, peace, spark, memefest, online, check, funny, meme, joke, english, sunday, m... world news iran us artificial intelligence maximize suppression expert latestly advertisement live break news tiger hide sugarcane field leap into air catch fly drone uttar pradeshs sitapur scary video surface jos buttler wicket video rajasthan royal batsman miss out hundred get dismiss during rr srh ipl match incest in peace un make huge blunder tweet promote peace spark memefest online check funny meme joke english sunday may late story minute ago jos buttler wicket video rajasthan royal b... 5 [(india, 0.02726375669099563), (news, 0.01626600625368251), (the, 0.013147455512208642), (ai, 0.0131171324271287), (say, 0.012978231377161618), (world, 0.012379546176493977), (sport, 0.011834340315342728), (indian, 0.010738636148648471), (video, 0.010566227707773499), (technology, 0.009398199552063896)] 33 [(india, 0.024569799889329225), (indian, 0.010357795432888053), (viral, 0.01004074684062756), (delhi, 0.00964573136531214), (from, 0.008946224838185916), (to, 0.008678885826372185), (air, 0.008235651508916347), (video, 0.008216694320890821), (flight, 0.008091139372302235), (watch, 0.007823786247833786)]
In [54]:
negative_sentiments.dtypes
Out[54]:
date               datetime64[ns]
clean_lem_title            object
clean_lem_text             object
title_length                int64
text_length                 int64
neg                       float64
neu                       float64
pos                       float64
compound                  float64
sentiment                  object
sentiments_SVM              int64
Sentiments_LR               int64
Sentiments_NB               int64
text_tokens                object
dtype: object
In [43]:
negative_sentiments['text_tokens_string'] = negative_sentiments['text_tokens'].apply(' '.join)
In [78]:
%%time
mod_BERT_neg = BERTopic(calculate_probabilities=True, verbose=True)
topics_neg, probabilities_neg = mod_BERT_neg.fit_transform(negative_sentiments['text_tokens_string'].tolist())
Batches:   0%|          | 0/320 [00:00<?, ?it/s]
2023-08-15 18:53:04,293 - BERTopic - Transformed documents to Embeddings
2023-08-15 18:53:14,706 - BERTopic - Reduced dimensionality
2023-08-15 18:53:28,029 - BERTopic - Clustered reduced embeddings
CPU times: user 14h 6min 15s, sys: 18min 3s, total: 14h 24min 19s
Wall time: 11min 53s
In [79]:
print("Number of topics:", mod_BERT_neg.get_topic_freq().shape[0])
Number of topics: 172
In [87]:
mod_BERT_neg.get_topic_info()
Out[87]:
Topic Count Name Representation Representative_Docs
0 -1 3900 -1_ai_the_technology_new [ai, the, technology, new, data, company, news, use, say, business] [global artificial intelligence marketing market competitive analysis market entry strategy price trend sustainability trend ksu the sentinel newspaper wednesday february break news 3d print gas market impact of covid19 future growth analysis and challenge analysis to lead player basf se linde air liquide praxair technology ceramified cable market late trend demand huge business opportunity entacapone market growth accelerate rapid pace due innovative strategy key player fermion suven life s...
1 0 375 0_market_report_analysis_growth [market, report, analysis, growth, global, artificial, intelligence, forecast, key, player] [artificial intelligence ai service market key player product production information analysis forecast technewsmobi market report skip content technewsmobi market report report technology market space electric news nasa satellite climate market forecast industry analysis market report contact space electric news nasa satellite climate market forecast industry analysis market report contact artificial intelligence ai service market key player product production information analysis forecast b...
2 1 359 1_chatgpt_student_write_chatbot [chatgpt, student, write, chatbot, use, openai, question, answer, user, like] [how use chatgpt la late news home business entertainment health news sport world technology search home technology how use chatgpt la stampa technology how use chatgpt la stampa admin february february chatgpt new example artificial intelligencethis generative able write text even produce image human would how write like man through socalled llm field ai research mean large language model ie deep learn algorithm generally train million text in way ai able predict word likely follow another ...
3 2 221 2_gray_group_platform_prnewswire [gray, group, platform, prnewswire, alert, medium, press, data, solution, inc] [edx launch groundbreaking ai microbootcamp partnership lead university skip contentmedically speakinghome prochampions agbest our appscovid19food newstexasnew tiplatest camsclosings delayssend your weather photostxdot highway conditionsdownload weather appweather resourceskcbd investigatessubmit tipchad read shootingreagor dyke coveragesex traffic south plainslubbock county medical examiner school beat petestats predictionshow watchcommunitytell me something goodnotebookpay it forwardcommun...
4 3 172 3_ago_hour_weather_video [ago, hour, weather, video, story, bestreviews, file, app, day, more] [washington tackle potential risk ai klbk kamc skip content klbk kamc lubbock sign up lubbock sponsor by toggle menu open navigation close navigation search please enter search term primary menu news local news video center kamc news video stream klbk news video stream state regional state state national washingtondc politics the hill trend friend border report late automotive news press release top story miss teen 3yearold hockley county top story animal rescue crosby county host dog day su...
... ... ... ... ... ...
167 166 11 166_wpsu_pb_volunteer_whqr [wpsu, pb, volunteer, whqr, live, pledgejoinreview, keystone, schedule, kid, board] [new tool help teacher detect ai write assignment wpsu search query show search home tv highlight video program schedule passport wpsu live stream pb video pb world pb create pb kid live highlight video program schedule passport wpsu live stream pb video pb world pb create pb kid live radio local news program schedule frequency contact local news program schedule frequency contact digital studio community event calendar volunteer community engagement event calendar volunteer community engage...
168 167 11 167_alzheimers_dementia_disease_cognitive [alzheimers, dementia, disease, cognitive, memory, patient, diagnosis, predict, study, could] [what dementia ai study show million old adult india likely dementia check early symptom mint explore sign epaper friday march stock mutual fund news home late news market premium money mutual fund industry company technology opinion web story video sign in my account subscribesearchmy readsepapernew notification newsletter ifsc code findernew web storiesmintgenie for you sme top section news company company news startup company result top company leader technology gadget tech review app new...
169 168 11 168_orleans_suarez_mayor_wgno [orleans, suarez, mayor, wgno, suarezs, parish, miami, presidential, francis, campaign] [mayor suarez launch artificial intelligence chatbot presidential campaign log nation mayor suarez launch artificial intelligence chatbot presidential campaign listen tap bookmark save article view save article gift article send article anyone subscription necessary view email copy link tap bookmark save article view save article gift article anyone read subscription require email copy link by ali swenson associate press july pm see story new york super pac support miami mayor francis suarez...
170 169 10 169_cision_all_overview_general [cision, all, overview, general, resource, entertainment, overviewview, product, consumer, lunit] [mim software inc receives health canada approval ai autocontouring software resource blog journalist log in sign up data privacy send release news product contact search search when type field list search result appear automatically update type search content no result found please change search term try news focus browse news release all news release all public company englishonly news release overview multimedia gallery all multimedia all photo all video multimedia gallery overview trend ...
171 170 10 170_safetylit_aa_injury_anxiety [safetylit, aa, injury, anxiety, disorder, doi, inserm, adolescence, citation, bulletin] [safetylit predict selfharm within six month initial presentation youth mental health service machine learn study home search boolean search thesaurus source author weekly update update bulletin pdf update bulletin web about my safetylit aa aa aa safetylit weekly update we compile citation summary new article every week email signup feed help tutorial faq contact contact info safetylit service search result journal article predict selfharm within six month initial presentation youth mental h...

172 rows × 5 columns

In [98]:
negative_topic_info_df = pd.DataFrame(mod_BERT_neg.get_topic_info())
print(negative_topic_info_df.shape)
negative_topic_info_df.head()
(172, 5)
Out[98]:
Topic Count Name Representation Representative_Docs
0 -1 3900 -1_ai_the_technology_new [ai, the, technology, new, data, company, news, use, say, business] [global artificial intelligence marketing market competitive analysis market entry strategy price trend sustainability trend ksu the sentinel newspaper wednesday february break news 3d print gas market impact of covid19 future growth analysis and challenge analysis to lead player basf se linde air liquide praxair technology ceramified cable market late trend demand huge business opportunity entacapone market growth accelerate rapid pace due innovative strategy key player fermion suven life s...
1 0 375 0_market_report_analysis_growth [market, report, analysis, growth, global, artificial, intelligence, forecast, key, player] [artificial intelligence ai service market key player product production information analysis forecast technewsmobi market report skip content technewsmobi market report report technology market space electric news nasa satellite climate market forecast industry analysis market report contact space electric news nasa satellite climate market forecast industry analysis market report contact artificial intelligence ai service market key player product production information analysis forecast b...
2 1 359 1_chatgpt_student_write_chatbot [chatgpt, student, write, chatbot, use, openai, question, answer, user, like] [how use chatgpt la late news home business entertainment health news sport world technology search home technology how use chatgpt la stampa technology how use chatgpt la stampa admin february february chatgpt new example artificial intelligencethis generative able write text even produce image human would how write like man through socalled llm field ai research mean large language model ie deep learn algorithm generally train million text in way ai able predict word likely follow another ...
3 2 221 2_gray_group_platform_prnewswire [gray, group, platform, prnewswire, alert, medium, press, data, solution, inc] [edx launch groundbreaking ai microbootcamp partnership lead university skip contentmedically speakinghome prochampions agbest our appscovid19food newstexasnew tiplatest camsclosings delayssend your weather photostxdot highway conditionsdownload weather appweather resourceskcbd investigatessubmit tipchad read shootingreagor dyke coveragesex traffic south plainslubbock county medical examiner school beat petestats predictionshow watchcommunitytell me something goodnotebookpay it forwardcommun...
4 3 172 3_ago_hour_weather_video [ago, hour, weather, video, story, bestreviews, file, app, day, more] [washington tackle potential risk ai klbk kamc skip content klbk kamc lubbock sign up lubbock sponsor by toggle menu open navigation close navigation search please enter search term primary menu news local news video center kamc news video stream klbk news video stream state regional state state national washingtondc politics the hill trend friend border report late automotive news press release top story miss teen 3yearold hockley county top story animal rescue crosby county host dog day su...
In [96]:
negative_topic_info_df.to_parquet('negative_topic_info_df.parquet')

bucket_name = 'nlp_final_ss'
file_path = 'negative_topic_info_df.parquet'

storage_client = storage.Client()
bucket = storage_client.get_bucket(bucket_name)
blob = bucket.blob(file_path)
blob.upload_from_filename(file_path)
In [81]:
%%time
negative_sentiments['new_Bert_topics']=mod_BERT_neg.topics_
negative_sentiments['new_Bert_topics_words'] = negative_sentiments['new_Bert_topics'].parallel_apply(lambda x: mod_BERT_neg.get_topic(x))
CPU times: user 639 ms, sys: 47.4 s, total: 48 s
Wall time: 49.1 s
In [82]:
print("Number of topics:", mod_BERT_neg.get_topic_freq().shape[0])
mod_BERT_neg.visualize_topics()
Number of topics: 172
In [77]:
mod_BERT_neg.visualize_topics()
In [63]:
print("Number of topics:", mod_BERT_neg.get_topic_freq().shape[0])
Number of topics: 29
In [48]:
mod_BERT_neg.visualize_topics()
In [ ]:
negative_sentiments.head()
Out[ ]:
date clean_lem_title clean_lem_text title_length text_length neg neu pos compound sentiment sentiments_SVM Sentiments_LR Sentiments_NB text_tokens text_tokens_string Bert_topics Bert_topics_words new_Bert_topics new_Bert_topics_words
77 2021-12-20 overjet sink teeth another round capital dental ai tech overjet sink teeth another round capital dental ai tech home mail news finance sport entertainment life shopping yahoo plus more yahoo finance sign mail sign view mail finance finance watchlists watchlists my portfolio my portfolio screener screener save screenerssaved screener equity screenerequity screener mutual fund screenermutual fund screener etf screeneretf screener future screenerfutures screener index screenerindex screener yahoo finance plus yahoo finance plus dashboarddashboard re... 9 1642 0.139 0.731 0.130 -0.9905 Negative 0 0 0 [overjet, sink, teeth, another, round, capital, dental, ai, tech, home, mail, news, finance, sport, entertainment, life, shopping, yahoo, plus, more, yahoo, finance, sign, mail, sign, view, mail, finance, finance, watchlists, watchlists, my, portfolio, my, portfolio, screener, screener, save, screenerssaved, screener, equity, screenerequity, screener, mutual, fund, screenermutual, fund, screener, etf, screeneretf, screener, future, screenerfutures, screener, index, screenerindex, screener, y... overjet sink teeth another round capital dental ai tech home mail news finance sport entertainment life shopping yahoo plus more yahoo finance sign mail sign view mail finance finance watchlists watchlists my portfolio my portfolio screener screener save screenerssaved screener equity screenerequity screener mutual fund screenermutual fund screener etf screeneretf screener future screenerfutures screener index screenerindex screener yahoo finance plus yahoo finance plus dashboarddashboard re... 2 [(stock, 0.044858893312774296), (trade, 0.021346022164199948), (nasdaq, 0.01793313270913545), (market, 0.016666330953797784), (trading, 0.016397712300894516), (etf, 0.01563330121373921), (ai, 0.015260485818267666), (company, 0.01482754523801245), (price, 0.01458658237366724), (share, 0.013789107014640908)] 40 [(screener, 0.03534953241403071), (yahoo, 0.021245574120685406), (finance, 0.02121403073928488), (stock, 0.019293243996420326), (zacks, 0.015123429610789293), (etf, 0.010387093758720479), (fund, 0.008750480251736137), (mutual, 0.00831376977214047), (earnings, 0.00785989400135487), (crypto, 0.007719804706106513)]
138 2023-02-01 versus system stock soar ai deal play off chatgpt buzz versus system stock soar ai deal play off chatgpt buzz jump main content search account menu icon vertical stack three evenly space horizontal line insider logo the word insider market insider logo the word market insider search market search icon magnify glass it indicates click perform search my watchlist business the word business life the word life news the word news review the word review market stock index commodity cryptocurrencies currency etf news calendar icon an icon shape calenda... 10 814 0.144 0.745 0.111 -0.9950 Negative 0 0 0 [versus, system, stock, soar, ai, deal, play, off, chatgpt, buzz, jump, main, content, search, account, menu, icon, vertical, stack, three, evenly, space, horizontal, line, insider, logo, the, word, insider, market, insider, logo, the, word, market, insider, search, market, search, icon, magnify, glass, it, indicates, click, perform, search, my, watchlist, business, the, word, business, life, the, word, life, news, the, word, news, review, the, word, review, market, stock, index, commodity, ... versus system stock soar ai deal play off chatgpt buzz jump main content search account menu icon vertical stack three evenly space horizontal line insider logo the word insider market insider logo the word market insider search market search icon magnify glass it indicates click perform search my watchlist business the word business life the word life news the word news review the word review market stock index commodity cryptocurrencies currency etf news calendar icon an icon shape calenda... 2 [(stock, 0.044858893312774296), (trade, 0.021346022164199948), (nasdaq, 0.01793313270913545), (market, 0.016666330953797784), (trading, 0.016397712300894516), (etf, 0.01563330121373921), (ai, 0.015260485818267666), (company, 0.01482754523801245), (price, 0.01458658237366724), (share, 0.013789107014640908)] 128 [(stock, 0.05061347303473834), (retirement, 0.03781073363594823), (fool, 0.033465440597503976), (invest, 0.030897951973290316), (motley, 0.030135688318770042), (best, 0.02662491289306415), (marketwatch, 0.025002178058159248), (icon, 0.02310325441647596), (card, 0.020257726958523917), (retire, 0.019973224338637408)]
152 2020-11-23 global manmade vascular graft market overview industry top manufacture getinge bard pv terumo neptune pine global manmade vascular graft market overview industry top manufacture getinge bard pv terumo neptune pine about usbecome contributorcontact usour teamprivacy policy neptune releasetechnology global manmade vascular graft market overview industry top manufacture getinge bard pv terumo post november richard corley comments0 this manmade vascular graft market report researcher industry size value capacity production consumption key region like usa europe japan china india south east asia and o... 15 1079 0.208 0.714 0.078 -0.9959 Negative 0 0 0 [global, manmade, vascular, graft, market, overview, industry, top, manufacture, getinge, bard, pv, terumo, neptune, pine, about, usbecome, contributorcontact, usour, teamprivacy, policy, neptune, releasetechnology, global, manmade, vascular, graft, market, overview, industry, top, manufacture, getinge, bard, pv, terumo, post, november, richard, corley, comments0, this, manmade, vascular, graft, market, report, researcher, industry, size, value, capacity, production, consumption, key, region... global manmade vascular graft market overview industry top manufacture getinge bard pv terumo neptune pine about usbecome contributorcontact usour teamprivacy policy neptune releasetechnology global manmade vascular graft market overview industry top manufacture getinge bard pv terumo post november richard corley comments0 this manmade vascular graft market report researcher industry size value capacity production consumption key region like usa europe japan china india south east asia and o... 0 [(market, 0.06417881022042422), (report, 0.030111541782806735), (analysis, 0.028195317075145887), (global, 0.026261051484429674), (intelligence, 0.025250843110517316), (artificial, 0.024498909756108068), (growth, 0.022859860466435514), (forecast, 0.020362313150523642), (research, 0.019701241797911415), (key, 0.0181895010370908)] 39 [(catheter, 0.05180562897705263), (vascular, 0.04166311923621422), (stent, 0.036904104806252135), (market, 0.03348024337962619), (peripheral, 0.022699642530120714), (biliary, 0.02224146840749923), (analysis, 0.021935789071298385), (report, 0.02139841123985318), (global, 0.0198155484113862), (device, 0.019730509755781257)]
166 2023-05-19 uk telecom company bt plan shed job replace ai phl17com uk telecom company bt plan shed job replace ai phl17com skip content phl17com philadelphia watch now watch now phl17 morning news sign up philadelphia watch now sponsor by toggle menu open navigation close navigation search please enter search term primary menu news nexstar medium wire national news entertainment health sport bestreviews bestreviews daily deal politics the hill pa election result phl17 news email newsletter signup phl17 morning news story philadelphia mayoral forum april 18t... 10 759 0.069 0.931 0.000 -0.1027 Negative 0 0 0 [uk, telecom, company, bt, plan, shed, job, replace, ai, phl17com, skip, content, phl17com, philadelphia, watch, now, watch, now, phl17, morning, news, sign, up, philadelphia, watch, now, sponsor, by, toggle, menu, open, navigation, close, navigation, search, please, enter, search, term, primary, menu, news, nexstar, medium, wire, national, news, entertainment, health, sport, bestreviews, bestreviews, daily, deal, politics, the, hill, pa, election, result, phl17, news, email, newsletter, sig... uk telecom company bt plan shed job replace ai phl17com skip content phl17com philadelphia watch now watch now phl17 morning news sign up philadelphia watch now sponsor by toggle menu open navigation close navigation search please enter search term primary menu news nexstar medium wire national news entertainment health sport bestreviews bestreviews daily deal politics the hill pa election result phl17 news email newsletter signup phl17 morning news story philadelphia mayoral forum april 18t... -1 [(ai, 0.017137036260213664), (the, 0.013858771488878598), (news, 0.012329264511106698), (new, 0.010745623813679906), (use, 0.010175887657488954), (say, 0.009991610194330772), (technology, 0.00954382264301549), (data, 0.008967752709379285), (company, 0.008776755643810932), (business, 0.008240774870109104)] -1 [(ai, 0.00532199048445284), (the, 0.004504404967411157), (technology, 0.00394335979108206), (new, 0.003894957526870397), (data, 0.0038456304425192774), (company, 0.0038037169956861455), (news, 0.0037269339370107134), (use, 0.0037263831443195145), (say, 0.003711918306034191), (business, 0.0035777241157662807)]
180 2020-03-26 lawrence livermore lab us artificial intelligence supercomputer global fight against covid19 lawrence livermore lab us artificial intelligence supercomputer global fight against covid19 home about contact newsletter resource facebook twitter youtube pinterest newsletter biology chemistry earth health physic science space technology hot topic march lawrence livermore lab us artificial intelligence supercomputer global fight against covid19 march new dinosaur discover new mexico is one last know survive raptor march unlock genetic code novel coronavirus how covid19 make leap from anim... 11 1200 0.097 0.816 0.087 -0.8316 Negative 0 1 0 [lawrence, livermore, lab, us, artificial, intelligence, supercomputer, global, fight, against, covid19, home, about, contact, newsletter, resource, facebook, twitter, youtube, pinterest, newsletter, biology, chemistry, earth, health, physic, science, space, technology, hot, topic, march, lawrence, livermore, lab, us, artificial, intelligence, supercomputer, global, fight, against, covid19, march, new, dinosaur, discover, new, mexico, is, one, last, know, survive, raptor, march, unlock, gene... lawrence livermore lab us artificial intelligence supercomputer global fight against covid19 home about contact newsletter resource facebook twitter youtube pinterest newsletter biology chemistry earth health physic science space technology hot topic march lawrence livermore lab us artificial intelligence supercomputer global fight against covid19 march new dinosaur discover new mexico is one last know survive raptor march unlock genetic code novel coronavirus how covid19 make leap from anim... 0 [(market, 0.06417881022042422), (report, 0.030111541782806735), (analysis, 0.028195317075145887), (global, 0.026261051484429674), (intelligence, 0.025250843110517316), (artificial, 0.024498909756108068), (growth, 0.022859860466435514), (forecast, 0.020362313150523642), (research, 0.019701241797911415), (key, 0.0181895010370908)] -1 [(ai, 0.00532199048445284), (the, 0.004504404967411157), (technology, 0.00394335979108206), (new, 0.003894957526870397), (data, 0.0038456304425192774), (company, 0.0038037169956861455), (news, 0.0037269339370107134), (use, 0.0037263831443195145), (say, 0.003711918306034191), (business, 0.0035777241157662807)]
In [ ]:
negative_sentiments.to_csv('gs://nlp_final_ss/Negtive_BERTopics_Sentiments.csv',index=False)

Positive Sentiment Ananlysis over Time¶

In [ ]:
positive_sentiments[['Bert_topics','Bert_topics_words']]
Out[ ]:
Bert_topics Bert_topics_words
0 -1 [(ai, 0.0015218465570483101), (gray, 0.0014727...
1 277 [(student, 0.026801741568129638), (enrollment,...
2 64 [(humphrey, 0.03770472885972689), (claude, 0.0...
3 1 [(market, 0.010140089305656881), (analysis, 0....
4 44 [(ip, 0.023631064706047734), (ipsoc, 0.0132072...
... ... ...
189969 5 [(chatgpt, 0.009597511471804692), (gpt4, 0.007...
189970 68 [(giroux, 0.05158238507480131), (flyer, 0.0478...
189971 -1 [(ai, 0.0015218465570483101), (gray, 0.0014727...
189972 341 [(vanderbilt, 0.03399016691635063), (benbow, 0...
189973 13 [(bard, 0.0325175172094128), (google, 0.014325...

172309 rows × 2 columns

In [ ]:
positive_sentiments['Bert_topics'].value_counts()
Out[ ]:
-1      63170
 0       2516
 1       2229
 2       2136
 3       1630
        ...  
 657       50
 658       50
 659       50
 660       50
 661       50
Name: Bert_topics, Length: 663, dtype: int64
In [ ]:
positive_sentiments[positive_sentiments['Bert_topics'] == 1].head(1)
Out[ ]:
date clean_lem_title clean_lem_text title_length text_length neg neu pos compound sentiment sentiments_SVM Sentiments_LR Sentiments_NB text_tokens text_tokens_string Bert_topics Bert_topics_words
3 2020-04-17 cr bard inc ha return since smartrend recommen... cr bard inc ha return since smartrend recommen... 11 282 0.014 0.841 0.145 0.9871 Positive 0 0 0 [cr, bard, inc, ha, return, since, smartrend, ... cr bard inc ha return since smartrend recommen... 1 [(market, 0.010140089305656881), (analysis, 0....

AI for market analysis¶

In [ ]:
 
In [ ]:
positive_sentiments[positive_sentiments['Bert_topics'] == 1].sample(n = 5, random_state = 42)
Out[ ]:
date clean_lem_title clean_lem_text title_length text_length neg neu pos compound sentiment sentiments_SVM Sentiments_LR Sentiments_NB text_tokens text_tokens_string Bert_topics Bert_topics_words
134398 2020-08-30 artificial intelligence ai hardware market size covid19 impact analysis top company international business machine corporation ibm mediatek rockwell automation cisco system scientect artificial intelligence ai hardware market size covid19 impact analysis top company international business machine corporation ibm mediatek rockwell automation cisco system scientect skip content menu energy news space uncategorized contact search search scientect menu energy news space uncategorized contact search search flash news electric teapot market size share application analysis regional outlook growth trend key player competitive strategy forecast cusinium tealyra for life design me... 22 1244 0.019 0.749 0.232 0.9991 Positive 0 0 0 [artificial, intelligence, ai, hardware, market, size, covid19, impact, analysis, top, company, international, business, machine, corporation, ibm, mediatek, rockwell, automation, cisco, system, scientect, skip, content, menu, energy, news, space, uncategorized, contact, search, search, scientect, menu, energy, news, space, uncategorized, contact, search, search, flash, news, electric, teapot, market, size, share, application, analysis, regional, outlook, growth, trend, key, player, competit... artificial intelligence ai hardware market size covid19 impact analysis top company international business machine corporation ibm mediatek rockwell automation cisco system scientect skip content menu energy news space uncategorized contact search search scientect menu energy news space uncategorized contact search search flash news electric teapot market size share application analysis regional outlook growth trend key player competitive strategy forecast cusinium tealyra for life design me... 1 [(market, 0.010044159804921245), (analysis, 0.008434842588608995), (growth, 0.007836588529166978), (player, 0.007496387440828896), (global, 0.007176008058518001), (forecast, 0.0070959986486200215), (key, 0.006787235603997208), (corporation, 0.006244465622931062), (size, 0.006121891481947713), (report, 0.0060881454826161975)]
56506 2020-09-12 artificial intelligence ai agricultural market research report cover update data consider impact covid19 share size future demand microsoft corp ibm corp niti aayog galus australis artificial intelligence ai agricultural market research report cover update data consider impact covid19 share size future demand microsoft corp ibm corp niti aayog galus australis galus australis business general news healthcare industry international lifestyle scitech saturday september trend global chronic disease management market observational study by top company like allscripts sciencesoft usa corporation siemens healthcare private limited artificial intelligence ai agricultural marke... 25 886 0.000 0.913 0.087 0.2960 Positive 0 0 0 [artificial, intelligence, ai, agricultural, market, research, report, cover, update, data, consider, impact, covid19, share, size, future, demand, microsoft, corp, ibm, corp, niti, aayog, galus, australis, galus, australis, business, general, news, healthcare, industry, international, lifestyle, scitech, saturday, september, trend, global, chronic, disease, management, market, observational, study, by, top, company, like, allscripts, sciencesoft, usa, corporation, siemens, healthcare, priva... artificial intelligence ai agricultural market research report cover update data consider impact covid19 share size future demand microsoft corp ibm corp niti aayog galus australis galus australis business general news healthcare industry international lifestyle scitech saturday september trend global chronic disease management market observational study by top company like allscripts sciencesoft usa corporation siemens healthcare private limited artificial intelligence ai agricultural marke... 1 [(market, 0.010044159804921245), (analysis, 0.008434842588608995), (growth, 0.007836588529166978), (player, 0.007496387440828896), (global, 0.007176008058518001), (forecast, 0.0070959986486200215), (key, 0.006787235603997208), (corporation, 0.006244465622931062), (size, 0.006121891481947713), (report, 0.0060881454826161975)]
70425 2020-11-29 global artificial intelligence software system market expect to reach high cagr google baidu ibm microsoft sap etc the haitiancaribbean news network global artificial intelligence software system market expect to reach high cagr google baidu ibm microsoft sap etc the haitiancaribbean news network skip content sunday nov break news step step analysis audiobooks market market size top manufacturer type application specification forecast global real time system market expect to reach high cagr stanley healthcare zebra technology teletracking technology ubisense group identec group etc relay market share growth trend and forecast to up marke... 21 1446 0.048 0.781 0.171 0.9982 Positive 0 0 0 [global, artificial, intelligence, software, system, market, expect, to, reach, high, cagr, google, baidu, ibm, microsoft, sap, etc, the, haitiancaribbean, news, network, skip, content, sunday, nov, break, news, step, step, analysis, audiobooks, market, market, size, top, manufacturer, type, application, specification, forecast, global, real, time, system, market, expect, to, reach, high, cagr, stanley, healthcare, zebra, technology, teletracking, technology, ubisense, group, identec, group,... global artificial intelligence software system market expect to reach high cagr google baidu ibm microsoft sap etc the haitiancaribbean news network skip content sunday nov break news step step analysis audiobooks market market size top manufacturer type application specification forecast global real time system market expect to reach high cagr stanley healthcare zebra technology teletracking technology ubisense group identec group etc relay market share growth trend and forecast to up marke... 1 [(market, 0.010044159804921245), (analysis, 0.008434842588608995), (growth, 0.007836588529166978), (player, 0.007496387440828896), (global, 0.007176008058518001), (forecast, 0.0070959986486200215), (key, 0.006787235603997208), (corporation, 0.006244465622931062), (size, 0.006121891481947713), (report, 0.0060881454826161975)]
101001 2021-03-27 global artificial intelligence computer network market report cisco system hewlett packard enterprise hpe ibm corporation samsung electronics co ltd baidu nvidia google microsoft corporation dell nokia corporation etc glendive gazette global artificial intelligence computer network market report cisco system hewlett packard enterprise hpe ibm corporation samsung electronics co ltd baidu nvidia google microsoft corporation dell nokia corporation etc glendive gazette skip content march glendive gazette eenrgy green energy late news space all news contact eenrgy green energy late news space all news contact search break news natural betaine market major technology giant buzz again dupont basf se kao corporation evonik indust... 30 853 0.069 0.760 0.171 0.9983 Positive 0 0 0 [global, artificial, intelligence, computer, network, market, report, cisco, system, hewlett, packard, enterprise, hpe, ibm, corporation, samsung, electronics, co, ltd, baidu, nvidia, google, microsoft, corporation, dell, nokia, corporation, etc, glendive, gazette, skip, content, march, glendive, gazette, eenrgy, green, energy, late, news, space, all, news, contact, eenrgy, green, energy, late, news, space, all, news, contact, search, break, news, natural, betaine, market, major, technology,... global artificial intelligence computer network market report cisco system hewlett packard enterprise hpe ibm corporation samsung electronics co ltd baidu nvidia google microsoft corporation dell nokia corporation etc glendive gazette skip content march glendive gazette eenrgy green energy late news space all news contact eenrgy green energy late news space all news contact search break news natural betaine market major technology giant buzz again dupont basf se kao corporation evonik indust... 1 [(market, 0.010044159804921245), (analysis, 0.008434842588608995), (growth, 0.007836588529166978), (player, 0.007496387440828896), (global, 0.007176008058518001), (forecast, 0.0070959986486200215), (key, 0.006787235603997208), (corporation, 0.006244465622931062), (size, 0.006121891481947713), (report, 0.0060881454826161975)]
80290 2021-02-17 artificial intelligence service aiaas market predict witness sustainable evolution year come ksu the sentinel newspaper artificial intelligence service aiaas market predict witness sustainable evolution year come ksu the sentinel newspaper wednesday february break news global variable displacement axial piston pump market late covid19 impact analysis know about brand player bosch rexroth corporation kawasaki heavy industry fmc technology interpump group annovi reverberi spa package coconut water market increase demand industry share growth industry study pandemic future growth analysis challenge analysis in d... 15 1895 0.084 0.803 0.113 0.9900 Positive 0 0 0 [artificial, intelligence, service, aiaas, market, predict, witness, sustainable, evolution, year, come, ksu, the, sentinel, newspaper, wednesday, february, break, news, global, variable, displacement, axial, piston, pump, market, late, covid19, impact, analysis, know, about, brand, player, bosch, rexroth, corporation, kawasaki, heavy, industry, fmc, technology, interpump, group, annovi, reverberi, spa, package, coconut, water, market, increase, demand, industry, share, growth, industry, stu... artificial intelligence service aiaas market predict witness sustainable evolution year come ksu the sentinel newspaper wednesday february break news global variable displacement axial piston pump market late covid19 impact analysis know about brand player bosch rexroth corporation kawasaki heavy industry fmc technology interpump group annovi reverberi spa package coconut water market increase demand industry share growth industry study pandemic future growth analysis challenge analysis in d... 1 [(market, 0.010044159804921245), (analysis, 0.008434842588608995), (growth, 0.007836588529166978), (player, 0.007496387440828896), (global, 0.007176008058518001), (forecast, 0.0070959986486200215), (key, 0.006787235603997208), (corporation, 0.006244465622931062), (size, 0.006121891481947713), (report, 0.0060881454826161975)]
In [ ]:
# Extract and format month and year
positive_sentiments['Month_Year'] = positive_sentiments['date'].dt.strftime('%b %Y')

# Convert 'Month_Year' column to datetime format
positive_sentiments['Month_Year'] = pd.to_datetime(positive_sentiments['Month_Year'])

plt.figure(figsize=(10, 6))

# Count the number of articles per month-year
filtered_df = positive_sentiments[positive_sentiments['Bert_topics'] == 1]
monthly_counts = filtered_df['Month_Year'].value_counts()

# Sort the index (datetime values) in chronological order
monthly_counts = monthly_counts.sort_index()

# Create the bar plot using Seaborn
ax = sns.barplot(x=monthly_counts.index, y=monthly_counts.values, color='green')

plt.xlabel('Month Year')
plt.ylabel('Number of Positive Articles')
plt.title('Monthly Distribution of Positive Articles Over Time')

# Reformat the x-axis labels to 'Jan 2020' format
ax.set_xticklabels(monthly_counts.index.strftime('%b %Y'), rotation=90)

plt.tight_layout()
plt.show()

Technology and Market Distribution¶

In [ ]:
positive_sentiments[positive_sentiments['Bert_topics'] == 2].sample(n = 3, random_state = 42)
Out[ ]:
date clean_lem_title clean_lem_text title_length text_length neg neu pos compound sentiment sentiments_SVM Sentiments_LR Sentiments_NB text_tokens text_tokens_string Bert_topics Bert_topics_words
39092 2022-07-27 data science platform market growth outlook data science platform market growth outlook about about ein presswire how we are different well how it work testimonial contact ein presswire news price distribution distribution overview medium database major news site tv radio station international newswires newswires industry newswires country newswires state mobile apps newsplugin live feed sample distribution report press release all feature by industry by country by state archive newswires international newswires newswires industry agr... 6 1288 0.009 0.863 0.128 0.9966 Positive 0 0 0 [data, science, platform, market, growth, outlook, about, about, ein, presswire, how, we, are, different, well, how, it, work, testimonial, contact, ein, presswire, news, price, distribution, distribution, overview, medium, database, major, news, site, tv, radio, station, international, newswires, newswires, industry, newswires, country, newswires, state, mobile, apps, newsplugin, live, feed, sample, distribution, report, press, release, all, feature, by, industry, by, country, by, state, ar... data science platform market growth outlook about about ein presswire how we are different well how it work testimonial contact ein presswire news price distribution distribution overview medium database major news site tv radio station international newswires newswires industry newswires country newswires state mobile apps newsplugin live feed sample distribution report press release all feature by industry by country by state archive newswires international newswires newswires industry agr... 2 [(newswires, 0.019804878500146027), (presswire, 0.014260630886724432), (ein, 0.014195970402431066), (dakota, 0.008438508409923861), (virginia, 0.0076258712670572515), (carolina, 0.007501503812976923), (guinea, 0.007292095718265533), (south, 0.007278378836501622), (island, 0.007021111778450116), (distribution, 0.006952483116191454)]
151346 2021-06-15 aerospace artificial intelligence market generate billion allied market research aerospace artificial intelligence market generate billion allied market research about about ein presswire how we are different well distribution overview how it work get start testimonial video tutorial editorial guideline faq contact price distribution price comparison chart distribution overview tv radio station ein microwires mobile apps press release all feature by industry by country by state archive newswires all by industry agriculture airline automotive banking book publish business... 9 1395 0.000 0.896 0.104 0.2960 Positive 0 0 0 [aerospace, artificial, intelligence, market, generate, billion, allied, market, research, about, about, ein, presswire, how, we, are, different, well, distribution, overview, how, it, work, get, start, testimonial, video, tutorial, editorial, guideline, faq, contact, price, distribution, price, comparison, chart, distribution, overview, tv, radio, station, ein, microwires, mobile, apps, press, release, all, feature, by, industry, by, country, by, state, archive, newswires, all, by, industry... aerospace artificial intelligence market generate billion allied market research about about ein presswire how we are different well distribution overview how it work get start testimonial video tutorial editorial guideline faq contact price distribution price comparison chart distribution overview tv radio station ein microwires mobile apps press release all feature by industry by country by state archive newswires all by industry agriculture airline automotive banking book publish business... 2 [(newswires, 0.019804878500146027), (presswire, 0.014260630886724432), (ein, 0.014195970402431066), (dakota, 0.008438508409923861), (virginia, 0.0076258712670572515), (carolina, 0.007501503812976923), (guinea, 0.007292095718265533), (south, 0.007278378836501622), (island, 0.007021111778450116), (distribution, 0.006952483116191454)]
34234 2020-09-15 lead 5g era tencents visual ai wimi hologram 5g stitch algorithm lead 5g era tencents visual ai wimi hologram 5g stitch algorithm about why how it work testimonial video tutorial editorial guideline faq contact price distribution price comparison chart distribution overview tv radio station ein microwires mobile apps press release all feature by industry by country by state archive newswires all by industry agriculture airline automotive banking book publish business casino chemical company conference trade show construction consumer cosmetic education el... 11 1199 0.012 0.762 0.226 0.9992 Positive 0 1 0 [lead, 5g, era, tencents, visual, ai, wimi, hologram, 5g, stitch, algorithm, about, why, how, it, work, testimonial, video, tutorial, editorial, guideline, faq, contact, price, distribution, price, comparison, chart, distribution, overview, tv, radio, station, ein, microwires, mobile, apps, press, release, all, feature, by, industry, by, country, by, state, archive, newswires, all, by, industry, agriculture, airline, automotive, banking, book, publish, business, casino, chemical, company, co... lead 5g era tencents visual ai wimi hologram 5g stitch algorithm about why how it work testimonial video tutorial editorial guideline faq contact price distribution price comparison chart distribution overview tv radio station ein microwires mobile apps press release all feature by industry by country by state archive newswires all by industry agriculture airline automotive banking book publish business casino chemical company conference trade show construction consumer cosmetic education el... 2 [(newswires, 0.019804878500146027), (presswire, 0.014260630886724432), (ein, 0.014195970402431066), (dakota, 0.008438508409923861), (virginia, 0.0076258712670572515), (carolina, 0.007501503812976923), (guinea, 0.007292095718265533), (south, 0.007278378836501622), (island, 0.007021111778450116), (distribution, 0.006952483116191454)]
In [ ]:
# Extract and format month and year
positive_sentiments['Month_Year'] = positive_sentiments['date'].dt.strftime('%b %Y')

# Convert 'Month_Year' column to datetime format
positive_sentiments['Month_Year'] = pd.to_datetime(positive_sentiments['Month_Year'])

plt.figure(figsize=(10, 6))

# Count the number of articles per month-year
filtered_df = positive_sentiments[positive_sentiments['Bert_topics'] == 2]
monthly_counts = filtered_df['Month_Year'].value_counts()

# Sort the index (datetime values) in chronological order
monthly_counts = monthly_counts.sort_index()

# Create the bar plot using Seaborn
ax = sns.barplot(x=monthly_counts.index, y=monthly_counts.values, color='green')

plt.xlabel('Month Year')
plt.ylabel('Number of Positive Articles')
plt.title('Monthly Distribution of Positive Articles Over Time')

# Reformat the x-axis labels to 'Jan 2020' format
ax.set_xticklabels(monthly_counts.index.strftime('%b %Y'), rotation=90)

plt.tight_layout()
plt.show()

AI for healthcare¶

In [ ]:
positive_sentiments[positive_sentiments['Bert_topics'] == 8].sample(n = 3, random_state = 42)
Out[ ]:
date clean_lem_title clean_lem_text title_length text_length neg neu pos compound sentiment sentiments_SVM Sentiments_LR Sentiments_NB text_tokens text_tokens_string Bert_topics Bert_topics_words
101568 2020-06-30 the world artificial intelligence conference open july visitor registration available now anp pers support de kortste weg naar publiciteit the world artificial intelligence conference open july visitor registration available now anp pers support de kortste weg naar publiciteit volg ons op aps nieuwsmanager login cision communication cloud login english home diensten verstuur direct een persbericht de medium bereiken nieuwswaardige content creren mediaaandacht monitoren en analyseren workshop event inspiratie gepubliceerde persberichten hoe schrijf je een effectief persbericht verhalen van klanten over ons over anp pers support ... 19 670 0.069 0.832 0.099 0.8939 Positive 1 1 0 [the, world, artificial, intelligence, conference, open, july, visitor, registration, available, now, anp, pers, support, de, kortste, weg, naar, publiciteit, volg, ons, op, aps, nieuwsmanager, login, cision, communication, cloud, login, english, home, diensten, verstuur, direct, een, persbericht, de, medium, bereiken, nieuwswaardige, content, creren, mediaaandacht, monitoren, en, analyseren, workshop, event, inspiratie, gepubliceerde, persberichten, hoe, schrijf, je, een, effectief, persber... the world artificial intelligence conference open july visitor registration available now anp pers support de kortste weg naar publiciteit volg ons op aps nieuwsmanager login cision communication cloud login english home diensten verstuur direct een persbericht de medium bereiken nieuwswaardige content creren mediaaandacht monitoren en analyseren workshop event inspiratie gepubliceerde persberichten hoe schrijf je een effectief persbericht verhalen van klanten over ons over anp pers support ... 8 [(coronavirus, 0.008864728307373521), (virus, 0.00875205147131821), (covid19, 0.00845048740377125), (outbreak, 0.00550175131506931), (vaccine, 0.004901187028885575), (pandemic, 0.004776594345156644), (disease, 0.004554059873686384), (researcher, 0.003666320459818079), (covid, 0.003595250121197314), (patient, 0.0035583598891204297)]
141154 2021-04-27 the innovative highspeed covid19 test platform combine mass spectrometry machine learn exbulletin the innovative highspeed covid19 test platform combine mass spectrometry machine learn exbulletin connect exbulletinthe innovative highspeed covid19 test platform combine mass spectrometry machine healththe innovative highspeed covid19 test platform combine mass spectrometry machine learn publish min agoon april 2021by newsdesk the target plate use mass spectrometer protein sample laser ionize measure analyze detect sarscov2 virus credit uc davis health maurice chairman ceo university califo... 12 1049 0.012 0.787 0.201 0.9993 Positive 0 0 0 [the, innovative, highspeed, covid19, test, platform, combine, mass, spectrometry, machine, learn, exbulletin, connect, exbulletinthe, innovative, highspeed, covid19, test, platform, combine, mass, spectrometry, machine, healththe, innovative, highspeed, covid19, test, platform, combine, mass, spectrometry, machine, learn, publish, min, agoon, april, 2021by, newsdesk, the, target, plate, use, mass, spectrometer, protein, sample, laser, ionize, measure, analyze, detect, sarscov2, virus, credi... the innovative highspeed covid19 test platform combine mass spectrometry machine learn exbulletin connect exbulletinthe innovative highspeed covid19 test platform combine mass spectrometry machine healththe innovative highspeed covid19 test platform combine mass spectrometry machine learn publish min agoon april 2021by newsdesk the target plate use mass spectrometer protein sample laser ionize measure analyze detect sarscov2 virus credit uc davis health maurice chairman ceo university califo... 8 [(coronavirus, 0.008864728307373521), (virus, 0.00875205147131821), (covid19, 0.00845048740377125), (outbreak, 0.00550175131506931), (vaccine, 0.004901187028885575), (pandemic, 0.004776594345156644), (disease, 0.004554059873686384), (researcher, 0.003666320459818079), (covid, 0.003595250121197314), (patient, 0.0035583598891204297)]
142295 2020-05-11 this ai tool predicts who get covid19 without test this ai tool predicts who get covid19 without test tnw news late hard fork plug read me growth quarter neural more tnw news event online event rebrand online event sprint online event checkout online event ecosystem online event transform tnw2020 enterprise intelligence answer space about tnw about advertise job contact news event online event rebrand online event sprint online event checkout online event ecosystem online event transform tnw2020 business index tnw amas space about advertise ... 9 424 0.011 0.777 0.211 0.9994 Positive 0 0 0 [this, ai, tool, predicts, who, get, covid19, without, test, tnw, news, late, hard, fork, plug, read, me, growth, quarter, neural, more, tnw, news, event, online, event, rebrand, online, event, sprint, online, event, checkout, online, event, ecosystem, online, event, transform, tnw2020, enterprise, intelligence, answer, space, about, tnw, about, advertise, job, contact, news, event, online, event, rebrand, online, event, sprint, online, event, checkout, online, event, ecosystem, online, even... this ai tool predicts who get covid19 without test tnw news late hard fork plug read me growth quarter neural more tnw news event online event rebrand online event sprint online event checkout online event ecosystem online event transform tnw2020 enterprise intelligence answer space about tnw about advertise job contact news event online event rebrand online event sprint online event checkout online event ecosystem online event transform tnw2020 business index tnw amas space about advertise ... 8 [(coronavirus, 0.008864728307373521), (virus, 0.00875205147131821), (covid19, 0.00845048740377125), (outbreak, 0.00550175131506931), (vaccine, 0.004901187028885575), (pandemic, 0.004776594345156644), (disease, 0.004554059873686384), (researcher, 0.003666320459818079), (covid, 0.003595250121197314), (patient, 0.0035583598891204297)]
In [ ]:
# Extract and format month and year
positive_sentiments['Month_Year'] = positive_sentiments['date'].dt.strftime('%b %Y')

# Convert 'Month_Year' column to datetime format
positive_sentiments['Month_Year'] = pd.to_datetime(positive_sentiments['Month_Year'])

plt.figure(figsize=(10, 6))

# Count the number of articles per month-year
filtered_df = positive_sentiments[positive_sentiments['Bert_topics'] == 8]
monthly_counts = filtered_df['Month_Year'].value_counts()

# Sort the index (datetime values) in chronological order
monthly_counts = monthly_counts.sort_index()

# Create the bar plot using Seaborn
ax = sns.barplot(x=monthly_counts.index, y=monthly_counts.values, color='green')

plt.xlabel('Month Year')
plt.ylabel('Number of Positive Articles')
plt.title('Monthly Distribution of Positive Articles Over Time')

# Reformat the x-axis labels to 'Jan 2020' format
ax.set_xticklabels(monthly_counts.index.strftime('%b %Y'), rotation=90)

plt.tight_layout()
plt.show()
In [ ]:
positive_sentiments[positive_sentiments['Bert_topics'] == 20].sample(n = 3, random_state = 42)
Out[ ]:
date clean_lem_title clean_lem_text title_length text_length neg neu pos compound sentiment sentiments_SVM Sentiments_LR Sentiments_NB text_tokens text_tokens_string Bert_topics Bert_topics_words Month_Year
13720 2021-07-13 artificial intelligence medicine artificial intelligence medicine skip content about dmca contact form privacy policy editorial policy sunday vision complete news world primary menu sunday vision home top news world economy science health technology sport entertainment contact form search intelligence medicine science artificial intelligence medicine min ago mia thompson if search database pubmed one important search engine scientific work field health international level find past five year nearly scientific article publis... 3 557 0.003 0.721 0.276 0.9997 Positive 0 0 0 [artificial, intelligence, medicine, skip, content, about, dmca, contact, form, privacy, policy, editorial, policy, sunday, vision, complete, news, world, primary, menu, sunday, vision, home, top, news, world, economy, science, health, technology, sport, entertainment, contact, form, search, intelligence, medicine, science, artificial, intelligence, medicine, min, ago, mia, thompson, if, search, database, pubmed, one, important, search, engine, scientific, work, field, health, international,... artificial intelligence medicine skip content about dmca contact form privacy policy editorial policy sunday vision complete news world primary menu sunday vision home top news world economy science health technology sport entertainment contact form search intelligence medicine science artificial intelligence medicine min ago mia thompson if search database pubmed one important search engine scientific work field health international level find past five year nearly scientific article publis... 20 [(healthcare, 0.012301374196889163), (patient, 0.01210438232233961), (care, 0.008687317878527858), (health, 0.007760882444755336), (hospital, 0.005156946585092279), (doctor, 0.004324018572158118), (clinical, 0.004304513704221675), (provider, 0.004248157645802022), (medical, 0.004084759870938637), (medicine, 0.004005351264932845)] 2021-07-01
173511 2023-05-16 will ai perpetuate eliminate health disparity will ai perpetuate eliminate health disparity skip main content home condition back condition view all addadhd allergy arthritis atrial fibrillation breast cancer cancer crohn disease depression diabetes dvt eczema eye health heart disease hiv aid lung disease lupus mental health multiple sclerosis migraine pain management psoriasis psoriatic arthritis rheumatoid arthritis sexual condition skin problem sleep disorder ulcerative colitis view all drug supplement back drug supplement drug suppl... 6 1123 0.040 0.841 0.119 0.9876 Positive 0 0 0 [will, ai, perpetuate, eliminate, health, disparity, skip, main, content, home, condition, back, condition, view, all, addadhd, allergy, arthritis, atrial, fibrillation, breast, cancer, cancer, crohn, disease, depression, diabetes, dvt, eczema, eye, health, heart, disease, hiv, aid, lung, disease, lupus, mental, health, multiple, sclerosis, migraine, pain, management, psoriasis, psoriatic, arthritis, rheumatoid, arthritis, sexual, condition, skin, problem, sleep, disorder, ulcerative, coliti... will ai perpetuate eliminate health disparity skip main content home condition back condition view all addadhd allergy arthritis atrial fibrillation breast cancer cancer crohn disease depression diabetes dvt eczema eye health heart disease hiv aid lung disease lupus mental health multiple sclerosis migraine pain management psoriasis psoriatic arthritis rheumatoid arthritis sexual condition skin problem sleep disorder ulcerative colitis view all drug supplement back drug supplement drug suppl... 20 [(healthcare, 0.012301374196889163), (patient, 0.01210438232233961), (care, 0.008687317878527858), (health, 0.007760882444755336), (hospital, 0.005156946585092279), (doctor, 0.004324018572158118), (clinical, 0.004304513704221675), (provider, 0.004248157645802022), (medical, 0.004084759870938637), (medicine, 0.004005351264932845)] 2023-05-01
21746 2021-08-09 buddi ai launch firstever endtoend revenue cycle automation suite power by proprietary contextual lake thestreet buddi ai launch firstever endtoend revenue cycle automation suite power by proprietary contextual lake thestreet log inreceive full access market insight commentary newsletter break news alert moreforgot passwordlog indont account sign up herejoin usreceive full access market insight commentary newsletter break news alert morei agree themavensterms andpolicysign upalready account login jim advisor buddi ai launch firstever endtoend revenue cycle automation suite power by proprietary contextu... 15 696 0.007 0.729 0.264 0.9995 Positive 0 0 0 [buddi, ai, launch, firstever, endtoend, revenue, cycle, automation, suite, power, by, proprietary, contextual, lake, thestreet, log, inreceive, full, access, market, insight, commentary, newsletter, break, news, alert, moreforgot, passwordlog, indont, account, sign, up, herejoin, usreceive, full, access, market, insight, commentary, newsletter, break, news, alert, morei, agree, themavensterms, andpolicysign, upalready, account, login, jim, advisor, buddi, ai, launch, firstever, endtoend, re... buddi ai launch firstever endtoend revenue cycle automation suite power by proprietary contextual lake thestreet log inreceive full access market insight commentary newsletter break news alert moreforgot passwordlog indont account sign up herejoin usreceive full access market insight commentary newsletter break news alert morei agree themavensterms andpolicysign upalready account login jim advisor buddi ai launch firstever endtoend revenue cycle automation suite power by proprietary contextu... 20 [(healthcare, 0.012301374196889163), (patient, 0.01210438232233961), (care, 0.008687317878527858), (health, 0.007760882444755336), (hospital, 0.005156946585092279), (doctor, 0.004324018572158118), (clinical, 0.004304513704221675), (provider, 0.004248157645802022), (medical, 0.004084759870938637), (medicine, 0.004005351264932845)] 2021-08-01
In [ ]:
# Extract and format month and year
positive_sentiments['Month_Year'] = positive_sentiments['date'].dt.strftime('%b %Y')

# Convert 'Month_Year' column to datetime format
positive_sentiments['Month_Year'] = pd.to_datetime(positive_sentiments['Month_Year'])

plt.figure(figsize=(10, 6))

# Count the number of articles per month-year
filtered_df = positive_sentiments[positive_sentiments['Bert_topics'] == 20]
monthly_counts = filtered_df['Month_Year'].value_counts()

# Sort the index (datetime values) in chronological order
monthly_counts = monthly_counts.sort_index()

# Create the bar plot using Seaborn
ax = sns.barplot(x=monthly_counts.index, y=monthly_counts.values, color='green')

plt.xlabel('Month Year')
plt.ylabel('Number of Positive Articles')
plt.title('Monthly Distribution of Positive Articles Over Time')

# Reformat the x-axis labels to 'Jan 2020' format
ax.set_xticklabels(monthly_counts.index.strftime('%b %Y'), rotation=90)

plt.tight_layout()
plt.show()
In [ ]:
# Extract and format month and year
positive_sentiments['Month_Year'] = positive_sentiments['date'].dt.strftime('%b %Y')

# Convert 'Month_Year' column to datetime format
positive_sentiments['Month_Year'] = pd.to_datetime(positive_sentiments['Month_Year'])

plt.figure(figsize=(10, 6))

# Count the number of articles per month-year
filtered_df = positive_sentiments[positive_sentiments['Bert_topics'] == 20]
monthly_counts = filtered_df['Month_Year'].value_counts()

# Sort the index (datetime values) in chronological order
monthly_counts = monthly_counts.sort_index()

# Create the bar plot using Seaborn
ax = sns.barplot(x=monthly_counts.index, y=monthly_counts.values, color='green')

plt.xlabel('Month Year')
plt.ylabel('Number of Positive Articles')
plt.title('Monthly Distribution of Positive Articles Over Time')

# Reformat the x-axis labels to 'Jan 2020' format
ax.set_xticklabels(monthly_counts.index.strftime('%b %Y'), rotation=90)

plt.tight_layout()
plt.show()

conversational AI¶

In [ ]:
positive_sentiments[positive_sentiments['Bert_topics'] == 11].sample(n = 3, random_state = 42)
Out[ ]:
date clean_lem_title clean_lem_text title_length text_length neg neu pos compound sentiment sentiments_SVM Sentiments_LR Sentiments_NB text_tokens text_tokens_string Bert_topics Bert_topics_words
148721 2023-02-10 google take first shot ai search engine war announces bard competitor chatgpt newsbreak google take first shot ai search engine war announces bard competitor chatgpt newsbreaksign arttv seriesbooks dancebehind viral videosperforming artstv musichip healthhealth servicesmental healthdiseases healthcancerfood sportspremier drinkspetsbeauty safetypublic safetyaccidentslaw enforcementtraffic advicefamily rentlabor issuestrouble scienceearth nationsmiddle location channel topic inwindows central central nextgeneration destination news advice buying recommendation window ecosystem pr... 13 1176 0.059 0.729 0.212 0.9991 Positive 1 1 0 [google, take, first, shot, ai, search, engine, war, announces, bard, competitor, chatgpt, newsbreaksign, arttv, seriesbooks, dancebehind, viral, videosperforming, artstv, musichip, healthhealth, servicesmental, healthdiseases, healthcancerfood, sportspremier, drinkspetsbeauty, safetypublic, safetyaccidentslaw, enforcementtraffic, advicefamily, rentlabor, issuestrouble, scienceearth, nationsmiddle, location, channel, topic, inwindows, central, central, nextgeneration, destination, news, advi... google take first shot ai search engine war announces bard competitor chatgpt newsbreaksign arttv seriesbooks dancebehind viral videosperforming artstv musichip healthhealth servicesmental healthdiseases healthcancerfood sportspremier drinkspetsbeauty safetypublic safetyaccidentslaw enforcementtraffic advicefamily rentlabor issuestrouble scienceearth nationsmiddle location channel topic inwindows central central nextgeneration destination news advice buying recommendation window ecosystem pr... 11 [(bard, 0.03231039532703996), (google, 0.014137957225052705), (chatbot, 0.006627379491619021), (pichai, 0.006010275366832634), (chatgpt, 0.004897290695999714), (answer, 0.004149653517238737), (response, 0.00411945867071401), (lamda, 0.0040170592490545625), (sundar, 0.003953516840000442), (quote, 0.0038950542711030703)]
37106 2023-03-22 chatgpt rival google launch bard chatbot uk chatgpt rival google launch bard chatbot uk technology science lifestyle technology science cheryl kahla audience content strategist minute read mar pm chatgpt rival google launch bard chatbot uk cheryl kahla google launch ai chatbot bard uk compete microsoftbacked chatgpt photo istock edit openai google logo google launch new artificial intelligence ai chatbot rival chatgpt bard first announce february incorporate google search due course when bard first announce google alphabet ceo sundar ... 7 335 0.000 0.723 0.277 0.9928 Positive 0 0 0 [chatgpt, rival, google, launch, bard, chatbot, uk, technology, science, lifestyle, technology, science, cheryl, kahla, audience, content, strategist, minute, read, mar, pm, chatgpt, rival, google, launch, bard, chatbot, uk, cheryl, kahla, google, launch, ai, chatbot, bard, uk, compete, microsoftbacked, chatgpt, photo, istock, edit, openai, google, logo, google, launch, new, artificial, intelligence, ai, chatbot, rival, chatgpt, bard, first, announce, february, incorporate, google, search, d... chatgpt rival google launch bard chatbot uk technology science lifestyle technology science cheryl kahla audience content strategist minute read mar pm chatgpt rival google launch bard chatbot uk cheryl kahla google launch ai chatbot bard uk compete microsoftbacked chatgpt photo istock edit openai google logo google launch new artificial intelligence ai chatbot rival chatgpt bard first announce february incorporate google search due course when bard first announce google alphabet ceo sundar ... 11 [(bard, 0.03231039532703996), (google, 0.014137957225052705), (chatbot, 0.006627379491619021), (pichai, 0.006010275366832634), (chatgpt, 0.004897290695999714), (answer, 0.004149653517238737), (response, 0.00411945867071401), (lamda, 0.0040170592490545625), (sundar, 0.003953516840000442), (quote, 0.0038950542711030703)]
16370 2023-02-10 google bard advert show new ai search tool make factual error business technology emirates247 google bard advert show new ai search tool make factual error business technology emirates247 sign web notification webpush close pm friday february search search advanced navigation social twitter facebook instagram linkedin youtube home news emirate government region world law order local region world lifestyle health shop food fashion entertainment film music celebrity gossip event bollywood buzz take one business economy finance corporate technology energy gold currency property real est... 14 493 0.000 0.808 0.192 0.9973 Positive 0 0 0 [google, bard, advert, show, new, ai, search, tool, make, factual, error, business, technology, emirates247, sign, web, notification, webpush, close, pm, friday, february, search, search, advanced, navigation, social, twitter, facebook, instagram, linkedin, youtube, home, news, emirate, government, region, world, law, order, local, region, world, lifestyle, health, shop, food, fashion, entertainment, film, music, celebrity, gossip, event, bollywood, buzz, take, one, business, economy, financ... google bard advert show new ai search tool make factual error business technology emirates247 sign web notification webpush close pm friday february search search advanced navigation social twitter facebook instagram linkedin youtube home news emirate government region world law order local region world lifestyle health shop food fashion entertainment film music celebrity gossip event bollywood buzz take one business economy finance corporate technology energy gold currency property real est... 11 [(bard, 0.03231039532703996), (google, 0.014137957225052705), (chatbot, 0.006627379491619021), (pichai, 0.006010275366832634), (chatgpt, 0.004897290695999714), (answer, 0.004149653517238737), (response, 0.00411945867071401), (lamda, 0.0040170592490545625), (sundar, 0.003953516840000442), (quote, 0.0038950542711030703)]
In [ ]:
# Extract and format month and year
positive_sentiments['Month_Year'] = positive_sentiments['date'].dt.strftime('%b %Y')

# Convert 'Month_Year' column to datetime format
positive_sentiments['Month_Year'] = pd.to_datetime(positive_sentiments['Month_Year'])

plt.figure(figsize=(10, 6))

# Count the number of articles per month-year
filtered_df = positive_sentiments[positive_sentiments['Bert_topics'] == 11]
monthly_counts = filtered_df['Month_Year'].value_counts()

# Sort the index (datetime values) in chronological order
monthly_counts = monthly_counts.sort_index()

# Create the bar plot using Seaborn
ax = sns.barplot(x=monthly_counts.index, y=monthly_counts.values, color='green')

plt.xlabel('Month Year')
plt.ylabel('Number of Positive Articles')
plt.title('Monthly Distribution of Positive Articles Over Time')

# Reformat the x-axis labels to 'Jan 2020' format
ax.set_xticklabels(monthly_counts.index.strftime('%b %Y'), rotation=90)

plt.tight_layout()
plt.show()

Artificial Intelligence and Advanced Computing¶

In [ ]:
positive_sentiments[positive_sentiments['Bert_topics'] == 12].sample(n = 3, random_state = 42)
Out[ ]:
date clean_lem_title clean_lem_text title_length text_length neg neu pos compound sentiment sentiments_SVM Sentiments_LR Sentiments_NB text_tokens text_tokens_string Bert_topics Bert_topics_words
163067 2023-06-27 nvidia brings ai compute platform cloud data firm snowflake et telecom nvidia brings ai compute platform cloud data firm snowflake et telecom get app home news exclusive event award more login go leader speak webinars satcom industry policy telecom equipment enterprise service device mobile apps teletalk job career telestats smartphone masterclasses newsletter about contact digital telco summit with 5g rollout telco look build agile resilient network india satcom symposium wireless connectivity set redefine space broadband tech series platform technology leader... 11 647 0.012 0.815 0.173 0.9994 Positive 0 0 0 [nvidia, brings, ai, compute, platform, cloud, data, firm, snowflake, et, telecom, get, app, home, news, exclusive, event, award, more, login, go, leader, speak, webinars, satcom, industry, policy, telecom, equipment, enterprise, service, device, mobile, apps, teletalk, job, career, telestats, smartphone, masterclasses, newsletter, about, contact, digital, telco, summit, with, 5g, rollout, telco, look, build, agile, resilient, network, india, satcom, symposium, wireless, connectivity, set, r... nvidia brings ai compute platform cloud data firm snowflake et telecom get app home news exclusive event award more login go leader speak webinars satcom industry policy telecom equipment enterprise service device mobile apps teletalk job career telestats smartphone masterclasses newsletter about contact digital telco summit with 5g rollout telco look build agile resilient network india satcom symposium wireless connectivity set redefine space broadband tech series platform technology leader... 12 [(nvidia, 0.02798544457352133), (nvidias, 0.011120200769294437), (dgx, 0.007288895444805347), (gpus, 0.006739286881278737), (huang, 0.006736358225432658), (gpu, 0.005900713973334954), (a100, 0.005746710524273908), (chip, 0.005710524122713665), (supercomputer, 0.005643543421899692), (compute, 0.004893045064437412)]
185606 2021-02-11 nvidia researcher train ai reward dog respond command venturebeat nvidia researcher train ai reward dog respond command venturebeat venturebeat homepage the machine gamesbeat job special issue account setting log out become member sign in venturebeat homepage the machine make sense ai venturebeat arvr big data cloud commerce dev enterprise entrepreneur marketing medium mobile security social transportation follow follow twitter follow facebook follow linkedin follow the machine ai machine learn computer vision natural language process robotic process autom... 9 656 0.021 0.880 0.098 0.9964 Positive 0 0 0 [nvidia, researcher, train, ai, reward, dog, respond, command, venturebeat, venturebeat, homepage, the, machine, gamesbeat, job, special, issue, account, setting, log, out, become, member, sign, in, venturebeat, homepage, the, machine, make, sense, ai, venturebeat, arvr, big, data, cloud, commerce, dev, enterprise, entrepreneur, marketing, medium, mobile, security, social, transportation, follow, follow, twitter, follow, facebook, follow, linkedin, follow, the, machine, ai, machine, learn, c... nvidia researcher train ai reward dog respond command venturebeat venturebeat homepage the machine gamesbeat job special issue account setting log out become member sign in venturebeat homepage the machine make sense ai venturebeat arvr big data cloud commerce dev enterprise entrepreneur marketing medium mobile security social transportation follow follow twitter follow facebook follow linkedin follow the machine ai machine learn computer vision natural language process robotic process autom... 12 [(nvidia, 0.02798544457352133), (nvidias, 0.011120200769294437), (dgx, 0.007288895444805347), (gpus, 0.006739286881278737), (huang, 0.006736358225432658), (gpu, 0.005900713973334954), (a100, 0.005746710524273908), (chip, 0.005710524122713665), (supercomputer, 0.005643543421899692), (compute, 0.004893045064437412)]
11838 2021-12-15 mt ai intema join nvidia inception vc alliance to invest in ai startup thestreet mt ai intema join nvidia inception vc alliance to invest in ai startup thestreet skip main contentdecember 2021log inreceive full access market insight commentary newsletter break news alert moreforgot passwordlog indont account sign up herejoin usreceive full access market insight commentary newsletter break news alert morei agree themavensterms andpolicysign upalready account login investingpersonal advisor publish datedec am estmts ai intema join nvidia inception vc alliance to invest in ... 14 645 0.009 0.822 0.169 0.9975 Positive 1 1 0 [mt, ai, intema, join, nvidia, inception, vc, alliance, to, invest, in, ai, startup, thestreet, skip, main, contentdecember, 2021log, inreceive, full, access, market, insight, commentary, newsletter, break, news, alert, moreforgot, passwordlog, indont, account, sign, up, herejoin, usreceive, full, access, market, insight, commentary, newsletter, break, news, alert, morei, agree, themavensterms, andpolicysign, upalready, account, login, investingpersonal, advisor, publish, datedec, am, estmts... mt ai intema join nvidia inception vc alliance to invest in ai startup thestreet skip main contentdecember 2021log inreceive full access market insight commentary newsletter break news alert moreforgot passwordlog indont account sign up herejoin usreceive full access market insight commentary newsletter break news alert morei agree themavensterms andpolicysign upalready account login investingpersonal advisor publish datedec am estmts ai intema join nvidia inception vc alliance to invest in ... 12 [(nvidia, 0.02798544457352133), (nvidias, 0.011120200769294437), (dgx, 0.007288895444805347), (gpus, 0.006739286881278737), (huang, 0.006736358225432658), (gpu, 0.005900713973334954), (a100, 0.005746710524273908), (chip, 0.005710524122713665), (supercomputer, 0.005643543421899692), (compute, 0.004893045064437412)]
In [ ]:
# Extract and format month and year
positive_sentiments['Month_Year'] = positive_sentiments['date'].dt.strftime('%b %Y')

# Convert 'Month_Year' column to datetime format
positive_sentiments['Month_Year'] = pd.to_datetime(positive_sentiments['Month_Year'])

plt.figure(figsize=(10, 6))

# Count the number of articles per month-year
filtered_df = positive_sentiments[positive_sentiments['Bert_topics'] == 12]
monthly_counts = filtered_df['Month_Year'].value_counts()

# Sort the index (datetime values) in chronological order
monthly_counts = monthly_counts.sort_index()

# Create the bar plot using Seaborn
ax = sns.barplot(x=monthly_counts.index, y=monthly_counts.values, color='green')

plt.xlabel('Month Year')
plt.ylabel('Number of Positive Articles')
plt.title('Monthly Distribution of Positive Articles Over Time')

# Reformat the x-axis labels to 'Jan 2020' format
ax.set_xticklabels(monthly_counts.index.strftime('%b %Y'), rotation=90)

plt.tight_layout()
plt.show()

AI in Finance¶

In [ ]:
positive_sentiments[positive_sentiments['Bert_topics'] == 16].sample(n = 3, random_state = 42)
Out[ ]:
date clean_lem_title clean_lem_text title_length text_length neg neu pos compound sentiment sentiments_SVM Sentiments_LR Sentiments_NB text_tokens text_tokens_string Bert_topics Bert_topics_words Month_Year
93371 2021-08-17 discovery enhances customer experience use machine learn discovery enhances customer experience use machine billionairesworlds billionaire forbes america rich selfmade woman china rich india rich indonesia rich korea rich thailand rich japan rich australia rich taiwan rich singapore rich philippine rich hong kongs rich malaysia rich money politics money innovationall innovation5g ai big data cloud cloud cognizant brandvoice pay program consumer tech cybersecurity enterprise tech future of work game healthcare innovation rule jumio brandvoice pay p... 7 887 0.075 0.816 0.110 0.9656 Positive 0 0 0 [discovery, enhances, customer, experience, use, machine, billionairesworlds, billionaire, forbes, america, rich, selfmade, woman, china, rich, india, rich, indonesia, rich, korea, rich, thailand, rich, japan, rich, australia, rich, taiwan, rich, singapore, rich, philippine, rich, hong, kongs, rich, malaysia, rich, money, politics, money, innovationall, innovation5g, ai, big, data, cloud, cloud, cognizant, brandvoice, pay, program, consumer, tech, cybersecurity, enterprise, tech, future, of,... discovery enhances customer experience use machine billionairesworlds billionaire forbes america rich selfmade woman china rich india rich indonesia rich korea rich thailand rich japan rich australia rich taiwan rich singapore rich philippine rich hong kongs rich malaysia rich money politics money innovationall innovation5g ai big data cloud cloud cognizant brandvoice pay program consumer tech cybersecurity enterprise tech future of work game healthcare innovation rule jumio brandvoice pay p... 16 [(brandvoice, 0.04093580853640632), (pay, 0.03178831024464323), (forbes, 0.02708200721512143), (rich, 0.024351227946676653), (program, 0.017259111846690118), (best, 0.01212846276725953), (card, 0.011051961933657406), (credit, 0.008683369505121089), (insurance, 0.007090665785208376), (loan, 0.006380518768347531)] 2021-08-01
164305 2020-08-14 more way for freelancer to prosper ai freelancer at omdenacom team up to solve tough social and economic challenge more way for freelancer to prosper ai freelancer at omdenacom team up to solve tough social and economic billionairesworlds billionairesforbes 400americas rich selfmade womenchinas richestindias richestindonesias richestkoreas richestthailands richestjapans richestaustralias richesttaiwans richestsingapores richestphilippines richesthong kongs richestmalaysias richestmoney politics moneyinnovationall innovation5gaibig datacloudcloud 100consumer techgamesgreen techhealthcarejapan brandvoice p... 19 1437 0.010 0.840 0.150 0.9995 Positive 1 1 0 [more, way, for, freelancer, to, prosper, ai, freelancer, at, omdenacom, team, up, to, solve, tough, social, and, economic, billionairesworlds, billionairesforbes, 400americas, rich, selfmade, womenchinas, richestindias, richestindonesias, richestkoreas, richestthailands, richestjapans, richestaustralias, richesttaiwans, richestsingapores, richestphilippines, richesthong, kongs, richestmalaysias, richestmoney, politics, moneyinnovationall, innovation5gaibig, datacloudcloud, 100consumer, tech... more way for freelancer to prosper ai freelancer at omdenacom team up to solve tough social and economic billionairesworlds billionairesforbes 400americas rich selfmade womenchinas richestindias richestindonesias richestkoreas richestthailands richestjapans richestaustralias richesttaiwans richestsingapores richestphilippines richesthong kongs richestmalaysias richestmoney politics moneyinnovationall innovation5gaibig datacloudcloud 100consumer techgamesgreen techhealthcarejapan brandvoice p... 16 [(brandvoice, 0.04093580853640632), (pay, 0.03178831024464323), (forbes, 0.02708200721512143), (rich, 0.024351227946676653), (program, 0.017259111846690118), (best, 0.01212846276725953), (card, 0.011051961933657406), (credit, 0.008683369505121089), (insurance, 0.007090665785208376), (loan, 0.006380518768347531)] 2020-08-01
147529 2020-02-26 struggle with fake ai here how to become real ai company struggle with fake ai here how to become real ai company billionaire all billionaire world billionaire forbes america rich selfmade woman china rich india rich indonesia rich korea rich thailand rich japan rich australia rich taiwan rich singapore rich philippine rich hong kongs rich malaysia rich money politics money innovation all innovation 5g ai anaplan brandvoice pay program big data cio network cloud cloud consumer tech cybersecurity enterprise tech everbridge brandvoice pay program ex... 11 1680 0.071 0.780 0.149 0.9953 Positive 0 0 0 [struggle, with, fake, ai, here, how, to, become, real, ai, company, billionaire, all, billionaire, world, billionaire, forbes, america, rich, selfmade, woman, china, rich, india, rich, indonesia, rich, korea, rich, thailand, rich, japan, rich, australia, rich, taiwan, rich, singapore, rich, philippine, rich, hong, kongs, rich, malaysia, rich, money, politics, money, innovation, all, innovation, 5g, ai, anaplan, brandvoice, pay, program, big, data, cio, network, cloud, cloud, consumer, tech,... struggle with fake ai here how to become real ai company billionaire all billionaire world billionaire forbes america rich selfmade woman china rich india rich indonesia rich korea rich thailand rich japan rich australia rich taiwan rich singapore rich philippine rich hong kongs rich malaysia rich money politics money innovation all innovation 5g ai anaplan brandvoice pay program big data cio network cloud cloud consumer tech cybersecurity enterprise tech everbridge brandvoice pay program ex... 16 [(brandvoice, 0.04093580853640632), (pay, 0.03178831024464323), (forbes, 0.02708200721512143), (rich, 0.024351227946676653), (program, 0.017259111846690118), (best, 0.01212846276725953), (card, 0.011051961933657406), (credit, 0.008683369505121089), (insurance, 0.007090665785208376), (loan, 0.006380518768347531)] 2020-02-01
In [ ]:
# Extract and format month and year
positive_sentiments['Month_Year'] = positive_sentiments['date'].dt.strftime('%b %Y')

# Convert 'Month_Year' column to datetime format
positive_sentiments['Month_Year'] = pd.to_datetime(positive_sentiments['Month_Year'])

plt.figure(figsize=(10, 6))

# Count the number of articles per month-year
filtered_df = positive_sentiments[positive_sentiments['Bert_topics'] == 16]
monthly_counts = filtered_df['Month_Year'].value_counts()

# Sort the index (datetime values) in chronological order
monthly_counts = monthly_counts.sort_index()

# Create the bar plot using Seaborn
ax = sns.barplot(x=monthly_counts.index, y=monthly_counts.values, color='green')

plt.xlabel('Month Year')
plt.ylabel('Number of Positive Articles')
plt.title('Monthly Distribution of Positive Articles Over Time')

# Reformat the x-axis labels to 'Jan 2020' format
ax.set_xticklabels(monthly_counts.index.strftime('%b %Y'), rotation=90)

plt.tight_layout()
plt.show()

AI in Banking and Financial Fraud Detection¶

In [ ]:
positive_sentiments[positive_sentiments['Bert_topics'] == 21].sample(n = 3, random_state = 42)
Out[ ]:
date clean_lem_title clean_lem_text title_length text_length neg neu pos compound sentiment sentiments_SVM Sentiments_LR Sentiments_NB text_tokens text_tokens_string Bert_topics Bert_topics_words Month_Year
180131 2021-08-16 third finance firm accelerate use artificial intelligence detect money laundering newsbreak third finance firm accelerate use artificial intelligence detect money laundering newsbreaksearch location channel topic insign channelsadd useprivacy policydo not sell my infohelp centerabout particle mediain this articleuningmoney launderingfinancial national crime bankbritishyou may also useprivacy policydo not sell my infohelp centerabout particle mediaview insoftwarethird finance firm accelerate use artificial intelligence detect money launderingby karl flinderscomputer weekly day agofi... 11 881 0.030 0.901 0.069 0.9719 Positive 0 0 0 [third, finance, firm, accelerate, use, artificial, intelligence, detect, money, laundering, newsbreaksearch, location, channel, topic, insign, channelsadd, useprivacy, policydo, not, sell, my, infohelp, centerabout, particle, mediain, this, articleuningmoney, launderingfinancial, national, crime, bankbritishyou, may, also, useprivacy, policydo, not, sell, my, infohelp, centerabout, particle, mediaview, insoftwarethird, finance, firm, accelerate, use, artificial, intelligence, detect, money,... third finance firm accelerate use artificial intelligence detect money laundering newsbreaksearch location channel topic insign channelsadd useprivacy policydo not sell my infohelp centerabout particle mediain this articleuningmoney launderingfinancial national crime bankbritishyou may also useprivacy policydo not sell my infohelp centerabout particle mediaview insoftwarethird finance firm accelerate use artificial intelligence detect money launderingby karl flinderscomputer weekly day agofi... 21 [(bank, 0.014993848474900541), (banking, 0.014613705505395192), (fraud, 0.010801782319428358), (cookie, 0.009860001104120181), (payment, 0.009305872737604613), (transaction, 0.0066249502417588995), (financial, 0.006482861730195843), (fintech, 0.005037135411788505), (resistant, 0.0048764344513001255), (complyadvantage, 0.004613514003587014)] 2021-08-01
173367 2020-04-24 put ai to work credit risk and payment pymntscom put ai to work credit risk and payment pymntscom today unionsconnected carscrossborder idenitygig economyhealthcare paymentsinternet thingsopen bankingrealtime paymentsrestaurant paymentssubscribe section today row oneartificial twocredit unionsconnected carscrossborder paymentsrow identitygig economyrow fourhealthcare paymentsinternet thingsopen bankingrow fiverealtime paymentsrestaurant technologyretailrow sixtravel payment artificial intelligencewhy ai early adopter are laserfocused on cr... 9 712 0.024 0.835 0.141 0.9995 Positive 0 0 0 [put, ai, to, work, credit, risk, and, payment, pymntscom, today, unionsconnected, carscrossborder, idenitygig, economyhealthcare, paymentsinternet, thingsopen, bankingrealtime, paymentsrestaurant, paymentssubscribe, section, today, row, oneartificial, twocredit, unionsconnected, carscrossborder, paymentsrow, identitygig, economyrow, fourhealthcare, paymentsinternet, thingsopen, bankingrow, fiverealtime, paymentsrestaurant, technologyretailrow, sixtravel, payment, artificial, intelligencewhy... put ai to work credit risk and payment pymntscom today unionsconnected carscrossborder idenitygig economyhealthcare paymentsinternet thingsopen bankingrealtime paymentsrestaurant paymentssubscribe section today row oneartificial twocredit unionsconnected carscrossborder paymentsrow identitygig economyrow fourhealthcare paymentsinternet thingsopen bankingrow fiverealtime paymentsrestaurant technologyretailrow sixtravel payment artificial intelligencewhy ai early adopter are laserfocused on cr... 21 [(bank, 0.014993848474900541), (banking, 0.014613705505395192), (fraud, 0.010801782319428358), (cookie, 0.009860001104120181), (payment, 0.009305872737604613), (transaction, 0.0066249502417588995), (financial, 0.006482861730195843), (fintech, 0.005037135411788505), (resistant, 0.0048764344513001255), (complyadvantage, 0.004613514003587014)] 2020-04-01
78702 2022-01-19 data vault holding expands expertise in artificial intelligence machine learn big data appoints tony evans c3 ai to advisory board data vault holding expands expertise in artificial intelligence machine learn big data appoints tony evans c3 ai to advisory board skip content5050 restaurant guidethe pledge photosdownload appswatch heroesvail silencejeff davis 8coronavirusvaccine centerfishing forecastriver stagessportssports person weektwo dayshigh health dividecommunitygas price trackerheart scheduleabout uslatest country music tvfull court press greta van susterengray dccpress releasesdata vault holding expands expertis... 20 528 0.073 0.786 0.141 0.9883 Positive 0 0 0 [data, vault, holding, expands, expertise, in, artificial, intelligence, machine, learn, big, data, appoints, tony, evans, c3, ai, to, advisory, board, skip, content5050, restaurant, guidethe, pledge, photosdownload, appswatch, heroesvail, silencejeff, davis, 8coronavirusvaccine, centerfishing, forecastriver, stagessportssports, person, weektwo, dayshigh, health, dividecommunitygas, price, trackerheart, scheduleabout, uslatest, country, music, tvfull, court, press, greta, van, susterengray, ... data vault holding expands expertise in artificial intelligence machine learn big data appoints tony evans c3 ai to advisory board skip content5050 restaurant guidethe pledge photosdownload appswatch heroesvail silencejeff davis 8coronavirusvaccine centerfishing forecastriver stagessportssports person weektwo dayshigh health dividecommunitygas price trackerheart scheduleabout uslatest country music tvfull court press greta van susterengray dccpress releasesdata vault holding expands expertis... 21 [(bank, 0.014993848474900541), (banking, 0.014613705505395192), (fraud, 0.010801782319428358), (cookie, 0.009860001104120181), (payment, 0.009305872737604613), (transaction, 0.0066249502417588995), (financial, 0.006482861730195843), (fintech, 0.005037135411788505), (resistant, 0.0048764344513001255), (complyadvantage, 0.004613514003587014)] 2022-01-01
In [ ]:
# Extract and format month and year
positive_sentiments['Month_Year'] = positive_sentiments['date'].dt.strftime('%b %Y')

# Convert 'Month_Year' column to datetime format
positive_sentiments['Month_Year'] = pd.to_datetime(positive_sentiments['Month_Year'])

plt.figure(figsize=(10, 6))

# Count the number of articles per month-year
filtered_df = positive_sentiments[positive_sentiments['Bert_topics'] == 21]
monthly_counts = filtered_df['Month_Year'].value_counts()

# Sort the index (datetime values) in chronological order
monthly_counts = monthly_counts.sort_index()

# Create the bar plot using Seaborn
ax = sns.barplot(x=monthly_counts.index, y=monthly_counts.values, color='green')

plt.xlabel('Month Year')
plt.ylabel('Number of Positive Articles')
plt.title('Monthly Distribution of Positive Articles Over Time')

# Reformat the x-axis labels to 'Jan 2020' format
ax.set_xticklabels(monthly_counts.index.strftime('%b %Y'), rotation=90)

plt.tight_layout()
plt.show()

artificial intelligence in making investment decisions.¶

In [ ]:
positive_sentiments[positive_sentiments['Bert_topics'] == 28].sample(n = 3)
Out[ ]:
date clean_lem_title clean_lem_text title_length text_length neg neu pos compound sentiment sentiments_SVM Sentiments_LR Sentiments_NB text_tokens text_tokens_string Bert_topics Bert_topics_words Month_Year
39427 2023-02-08 retail investor flock smallcap ai firm big tech battle share reuters retail investor flock smallcap ai firm big tech battle share reuters skip main contentexclusive news data analytics financial market viewregisterus markets3 minute readfebruary pm utclast update agoretail investor flock smallcap ai firm big tech battle shareby medha singhchatgpt logo see illustration take february reutersdado reuters retail investor pile smallcap firm building artificial intelligence tool company include googleparent alphabet microsoft jostle pull ahead race next big growth ... 11 779 0.023 0.773 0.205 0.9958 Positive 0 0 0 [retail, investor, flock, smallcap, ai, firm, big, tech, battle, share, reuters, skip, main, contentexclusive, news, data, analytics, financial, market, viewregisterus, markets3, minute, readfebruary, pm, utclast, update, agoretail, investor, flock, smallcap, ai, firm, big, tech, battle, shareby, medha, singhchatgpt, logo, see, illustration, take, february, reutersdado, reuters, retail, investor, pile, smallcap, firm, building, artificial, intelligence, tool, company, include, googleparent, ... retail investor flock smallcap ai firm big tech battle share reuters skip main contentexclusive news data analytics financial market viewregisterus markets3 minute readfebruary pm utclast update agoretail investor flock smallcap ai firm big tech battle shareby medha singhchatgpt logo see illustration take february reutersdado reuters retail investor pile smallcap firm building artificial intelligence tool company include googleparent alphabet microsoft jostle pull ahead race next big growth ... 28 [(stock, 0.023904788595814668), (investorplace, 0.01544796477269306), (buy, 0.008307764777238285), (investor, 0.007376686291163532), (bearish, 0.006724162222856815), (bullish, 0.00636508904505768), (analyst, 0.00523340581064324), (trading, 0.005053029803090463), (sp, 0.00495622236670173), (return, 0.004890595467201021)] 2023-02-01
159981 2023-06-26 ai stock that could get amazonamd deal boost investorplace ai stock that could get amazonamd deal boost investorplace skip content dow nasdaq sp primary menu our analyst free report newsletter free report log my account my service support log today market stock stock pick hot stock stock buy stock sell stock quote all stock pick stock type bluechip stock dividend stock growth stock meme stock penny stock undervalue stock industry consumer discretionary consumer staple energy healthcare technology more industry crypto market analysis retirement about... 9 1071 0.015 0.792 0.193 0.9998 Positive 0 0 0 [ai, stock, that, could, get, amazonamd, deal, boost, investorplace, skip, content, dow, nasdaq, sp, primary, menu, our, analyst, free, report, newsletter, free, report, log, my, account, my, service, support, log, today, market, stock, stock, pick, hot, stock, stock, buy, stock, sell, stock, quote, all, stock, pick, stock, type, bluechip, stock, dividend, stock, growth, stock, meme, stock, penny, stock, undervalue, stock, industry, consumer, discretionary, consumer, staple, energy, healthca... ai stock that could get amazonamd deal boost investorplace skip content dow nasdaq sp primary menu our analyst free report newsletter free report log my account my service support log today market stock stock pick hot stock stock buy stock sell stock quote all stock pick stock type bluechip stock dividend stock growth stock meme stock penny stock undervalue stock industry consumer discretionary consumer staple energy healthcare technology more industry crypto market analysis retirement about... 28 [(stock, 0.023904788595814668), (investorplace, 0.01544796477269306), (buy, 0.008307764777238285), (investor, 0.007376686291163532), (bearish, 0.006724162222856815), (bullish, 0.00636508904505768), (analyst, 0.00523340581064324), (trading, 0.005053029803090463), (sp, 0.00495622236670173), (return, 0.004890595467201021)] 2023-06-01
28060 2023-01-17 ai stock that will make you amazingly rich year investorplace ai stock that will make you amazingly rich year investorplace skip content dow nasdaq sp primary menu premium service our analyst newsletter log my account my service support log today market stock stock pick hot stock stock buy stock sell stock quote all stock pick stock type bluechip stock dividend stock growth stock meme stock penny stock undervalue stock industry consumer discretionary consumer staple energy healthcare technology more industry crypto market analysis retirement about inve... 10 960 0.026 0.838 0.135 0.9978 Positive 0 0 0 [ai, stock, that, will, make, you, amazingly, rich, year, investorplace, skip, content, dow, nasdaq, sp, primary, menu, premium, service, our, analyst, newsletter, log, my, account, my, service, support, log, today, market, stock, stock, pick, hot, stock, stock, buy, stock, sell, stock, quote, all, stock, pick, stock, type, bluechip, stock, dividend, stock, growth, stock, meme, stock, penny, stock, undervalue, stock, industry, consumer, discretionary, consumer, staple, energy, healthcare, te... ai stock that will make you amazingly rich year investorplace skip content dow nasdaq sp primary menu premium service our analyst newsletter log my account my service support log today market stock stock pick hot stock stock buy stock sell stock quote all stock pick stock type bluechip stock dividend stock growth stock meme stock penny stock undervalue stock industry consumer discretionary consumer staple energy healthcare technology more industry crypto market analysis retirement about inve... 28 [(stock, 0.023904788595814668), (investorplace, 0.01544796477269306), (buy, 0.008307764777238285), (investor, 0.007376686291163532), (bearish, 0.006724162222856815), (bullish, 0.00636508904505768), (analyst, 0.00523340581064324), (trading, 0.005053029803090463), (sp, 0.00495622236670173), (return, 0.004890595467201021)] 2023-01-01
In [ ]:
# Extract and format month and year
positive_sentiments['Month_Year'] = positive_sentiments['date'].dt.strftime('%b %Y')

# Convert 'Month_Year' column to datetime format
positive_sentiments['Month_Year'] = pd.to_datetime(positive_sentiments['Month_Year'])

plt.figure(figsize=(10, 6))

# Count the number of articles per month-year
filtered_df = positive_sentiments[positive_sentiments['Bert_topics'] == 28]
monthly_counts = filtered_df['Month_Year'].value_counts()

# Sort the index (datetime values) in chronological order
monthly_counts = monthly_counts.sort_index()

# Create the bar plot using Seaborn
ax = sns.barplot(x=monthly_counts.index, y=monthly_counts.values, color='green')

plt.xlabel('Month Year')
plt.ylabel('Number of Positive Articles')
plt.title('Monthly Distribution of Positive Articles Over Time')

# Reformat the x-axis labels to 'Jan 2020' format
ax.set_xticklabels(monthly_counts.index.strftime('%b %Y'), rotation=90)

plt.tight_layout()
plt.show()

AI in Gaming¶

In [ ]:
positive_sentiments[positive_sentiments['Bert_topics'] == 33].sample(n = 3)
Out[ ]:
date clean_lem_title clean_lem_text title_length text_length neg neu pos compound sentiment sentiments_SVM Sentiments_LR Sentiments_NB text_tokens text_tokens_string Bert_topics Bert_topics_words Month_Year
74900 2022-11-29 ea introduces ai system detects player behavior phoneworld ea introduces ai system detects player behavior phoneworld menu news device apps game telecom review best of android iphone internet pc how to package call jazz call package telenor call package ufone call package warid call package zong call package sm zong sm package ufone sm package warid sm package jazz sm package telenor sm package internet jazz internet package ufone internet package zong internet package telenor internet package warid internet package pta tax calculator mobile price m... 8 598 0.119 0.679 0.202 0.9875 Positive 0 0 0 [ea, introduces, ai, system, detects, player, behavior, phoneworld, menu, news, device, apps, game, telecom, review, best, of, android, iphone, internet, pc, how, to, package, call, jazz, call, package, telenor, call, package, ufone, call, package, warid, call, package, zong, call, package, sm, zong, sm, package, ufone, sm, package, warid, sm, package, jazz, sm, package, telenor, sm, package, internet, jazz, internet, package, ufone, internet, package, zong, internet, package, telenor, inter... ea introduces ai system detects player behavior phoneworld menu news device apps game telecom review best of android iphone internet pc how to package call jazz call package telenor call package ufone call package warid call package zong call package sm zong sm package ufone sm package warid sm package jazz sm package telenor sm package internet jazz internet package ufone internet package zong internet package telenor internet package warid internet package pta tax calculator mobile price m... 33 [(game, 0.020269459826465597), (npc, 0.00787036392522475), (xbox, 0.006636985307265514), (pc, 0.005660748304813759), (playstation, 0.005424199534356989), (mmo, 0.005214609205578811), (chess, 0.004847539426440268), (player, 0.004839190247571787), (play, 0.004713432097319996), (gamer, 0.004464249723469274)] 2022-11-01
178895 2023-06-09 generative ai help bring tomorrow game npc life engadget generative ai help bring tomorrow game npc life techhandsonview reviewsbuying guidesbest wireless earbudsbest robot vacuumsbest laptopsbest game laptopsbest vpnbest bluetooth trackersbest webcamsview buying guidesgamingbest tomorrowwwdc techhandsonview reviewsbuying guidesbest wireless earbudsbest robot vacuumsbest laptopsbest game laptopsbest vpnbest bluetooth trackersbest webcamsview buying guidesgamingbest tomorrowwwdc ai help bring tomorrow game npc lifesay goodbye dialog tree hello like... 9 1543 0.020 0.778 0.203 0.9994 Positive 0 1 0 [generative, ai, help, bring, tomorrow, game, npc, life, techhandsonview, reviewsbuying, guidesbest, wireless, earbudsbest, robot, vacuumsbest, laptopsbest, game, laptopsbest, vpnbest, bluetooth, trackersbest, webcamsview, buying, guidesgamingbest, tomorrowwwdc, techhandsonview, reviewsbuying, guidesbest, wireless, earbudsbest, robot, vacuumsbest, laptopsbest, game, laptopsbest, vpnbest, bluetooth, trackersbest, webcamsview, buying, guidesgamingbest, tomorrowwwdc, ai, help, bring, tomorrow, ... generative ai help bring tomorrow game npc life techhandsonview reviewsbuying guidesbest wireless earbudsbest robot vacuumsbest laptopsbest game laptopsbest vpnbest bluetooth trackersbest webcamsview buying guidesgamingbest tomorrowwwdc techhandsonview reviewsbuying guidesbest wireless earbudsbest robot vacuumsbest laptopsbest game laptopsbest vpnbest bluetooth trackersbest webcamsview buying guidesgamingbest tomorrowwwdc ai help bring tomorrow game npc lifesay goodbye dialog tree hello like... 33 [(game, 0.020269459826465597), (npc, 0.00787036392522475), (xbox, 0.006636985307265514), (pc, 0.005660748304813759), (playstation, 0.005424199534356989), (mmo, 0.005214609205578811), (chess, 0.004847539426440268), (player, 0.004839190247571787), (play, 0.004713432097319996), (gamer, 0.004464249723469274)] 2023-06-01
126577 2022-05-19 total war warhammer update add new elite unit tweak ai aggressiveness more total war warhammer update add new elite unit tweak ai aggressiveness more feature guide news review xbox pc playstation nintendo entertainment quiz connect twinfinite feature guide news review xbox pc playstation nintendo entertainment quiz total war warhammer update add new elite unit tweak ai aggressiveness more share tweet news total war warhammer update add new elite unit tweak ai aggressiveness more by jake su publish may jake su home news total war warhammer update add new elite unit ... 12 352 0.070 0.805 0.125 0.9774 Positive 1 1 0 [total, war, warhammer, update, add, new, elite, unit, tweak, ai, aggressiveness, more, feature, guide, news, review, xbox, pc, playstation, nintendo, entertainment, quiz, connect, twinfinite, feature, guide, news, review, xbox, pc, playstation, nintendo, entertainment, quiz, total, war, warhammer, update, add, new, elite, unit, tweak, ai, aggressiveness, more, share, tweet, news, total, war, warhammer, update, add, new, elite, unit, tweak, ai, aggressiveness, more, by, jake, su, publish, ma... total war warhammer update add new elite unit tweak ai aggressiveness more feature guide news review xbox pc playstation nintendo entertainment quiz connect twinfinite feature guide news review xbox pc playstation nintendo entertainment quiz total war warhammer update add new elite unit tweak ai aggressiveness more share tweet news total war warhammer update add new elite unit tweak ai aggressiveness more by jake su publish may jake su home news total war warhammer update add new elite unit ... 33 [(game, 0.020269459826465597), (npc, 0.00787036392522475), (xbox, 0.006636985307265514), (pc, 0.005660748304813759), (playstation, 0.005424199534356989), (mmo, 0.005214609205578811), (chess, 0.004847539426440268), (player, 0.004839190247571787), (play, 0.004713432097319996), (gamer, 0.004464249723469274)] 2022-05-01
In [ ]:
# Extract and format month and year
positive_sentiments['Month_Year'] = positive_sentiments['date'].dt.strftime('%b %Y')

# Convert 'Month_Year' column to datetime format
positive_sentiments['Month_Year'] = pd.to_datetime(positive_sentiments['Month_Year'])

plt.figure(figsize=(10, 6))

# Count the number of articles per month-year
filtered_df = positive_sentiments[positive_sentiments['Bert_topics'] == 33]
monthly_counts = filtered_df['Month_Year'].value_counts()

# Sort the index (datetime values) in chronological order
monthly_counts = monthly_counts.sort_index()

# Create the bar plot using Seaborn
ax = sns.barplot(x=monthly_counts.index, y=monthly_counts.values, color='green')

plt.xlabel('Month Year')
plt.ylabel('Number of Positive Articles')
plt.title('Monthly Distribution of Positive Articles Over Time')

# Reformat the x-axis labels to 'Jan 2020' format
ax.set_xticklabels(monthly_counts.index.strftime('%b %Y'), rotation=90)

plt.tight_layout()
plt.show()

AI's Impact on Employment and Human Resources¶

In [ ]:
positive_sentiments[positive_sentiments['Bert_topics'] == 35].sample(n = 3)
---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
/opt/conda/lib/python3.7/site-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
   3360             try:
-> 3361                 return self._engine.get_loc(casted_key)
   3362             except KeyError as err:

/opt/conda/lib/python3.7/site-packages/pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()

/opt/conda/lib/python3.7/site-packages/pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()

pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()

pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()

KeyError: 'Bert_topics'

The above exception was the direct cause of the following exception:

KeyError                                  Traceback (most recent call last)
/var/tmp/ipykernel_12349/3818417282.py in <module>
----> 1 positive_sentiments[positive_sentiments['Bert_topics'] == 35].sample(n = 3)

/opt/conda/lib/python3.7/site-packages/pandas/core/frame.py in __getitem__(self, key)
   3456             if self.columns.nlevels > 1:
   3457                 return self._getitem_multilevel(key)
-> 3458             indexer = self.columns.get_loc(key)
   3459             if is_integer(indexer):
   3460                 indexer = [indexer]

/opt/conda/lib/python3.7/site-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
   3361                 return self._engine.get_loc(casted_key)
   3362             except KeyError as err:
-> 3363                 raise KeyError(key) from err
   3364 
   3365         if is_scalar(key) and isna(key) and not self.hasnans:

KeyError: 'Bert_topics'
In [ ]:
# Extract and format month and year
positive_sentiments['Month_Year'] = positive_sentiments['date'].dt.strftime('%b %Y')

# Convert 'Month_Year' column to datetime format
positive_sentiments['Month_Year'] = pd.to_datetime(positive_sentiments['Month_Year'])

plt.figure(figsize=(10, 6))

# Count the number of articles per month-year
filtered_df = positive_sentiments[positive_sentiments['Bert_topics'] == 35]
monthly_counts = filtered_df['Month_Year'].value_counts()

# Sort the index (datetime values) in chronological order
monthly_counts = monthly_counts.sort_index()

# Create the bar plot using Seaborn
ax = sns.barplot(x=monthly_counts.index, y=monthly_counts.values, color='green')

plt.xlabel('Month Year')
plt.ylabel('Number of Positive Articles')
plt.title('Monthly Distribution of Positive Articles Over Time')

# Reformat the x-axis labels to 'Jan 2020' format
ax.set_xticklabels(monthly_counts.index.strftime('%b %Y'), rotation=90)

plt.tight_layout()
plt.show()

Negative Sentiment Over Time¶

In [ ]:
negative_sentiments = pd.read_csv('gs://nlp_final_ss/Negtive_BERTopics_Sentiments.csv')
negative_sentiments.shape
Out[ ]:
(10227, 17)
In [ ]:
negative_sentiments.head(2)
Out[ ]:
date clean_lem_title clean_lem_text title_length text_length neg neu pos compound sentiment sentiments_SVM Sentiments_LR Sentiments_NB text_tokens text_tokens_string Bert_topics Bert_topics_words
0 2021-12-20 overjet sink teeth another round capital denta... overjet sink teeth another round capital denta... 9 1642 0.139 0.731 0.130 -0.9905 Negative 0 0 0 ['overjet', 'sink', 'teeth', 'another', 'round... overjet sink teeth another round capital denta... 3 [('stock', 0.04445421827697157), ('trade', 0.0...
1 2023-02-01 versus system stock soar ai deal play off chat... versus system stock soar ai deal play off chat... 10 814 0.144 0.745 0.111 -0.9950 Negative 0 0 0 ['versus', 'system', 'stock', 'soar', 'ai', 'd... versus system stock soar ai deal play off chat... 3 [('stock', 0.04445421827697157), ('trade', 0.0...
In [ ]:
negative_sentiments['Bert_topics'].value_counts()
Out[ ]:
-1     4302
 0      781
 1      752
 2      617
 3      474
 4      424
 5      395
 6      303
 7      251
 8      235
 9      195
 10     132
 11     125
 12     123
 13     112
 14     111
 15     109
 16      88
 17      80
 18      80
 19      79
 20      77
 21      72
 22      71
 23      66
 24      61
 25      60
 26      52
Name: Bert_topics, dtype: int64
In [ ]:
 
In [ ]:
negative_sentiments['date'] = pd.to_datetime(negative_sentiments['date'])

Ai in tchnology¶

In [ ]:
negative_sentiments[negative_sentiments['Bert_topics'] == -1].sample(n = 3)
Out[ ]:
date clean_lem_title clean_lem_text title_length text_length neg neu pos compound sentiment sentiments_SVM Sentiments_LR Sentiments_NB text_tokens text_tokens_string Bert_topics Bert_topics_words Month_Year
3368 2023-05-19 fake tesla ad us ai create false ryan reynolds endorsement kgwcom fake tesla ad us ai create false ryan reynolds endorsement kgwcom skip navigation share facebook share twitter share sm share email navigation news back local kgw investigates near me health the story nation world politics life community entertainment feature late news story voter shake newberg school board follow string controversy afterschool pride event cancel portland elementary school due threat weather back forecast radar 10day hourly map closing delay traffic weather cam late weather ... 11 621 0.108 0.783 0.110 -0.7583 Negative 0 0 0 ['fake', 'tesla', 'ad', 'us', 'ai', 'create', 'false', 'ryan', 'reynolds', 'endorsement', 'kgwcom', 'skip', 'navigation', 'share', 'facebook', 'share', 'twitter', 'share', 'sm', 'share', 'email', 'navigation', 'news', 'back', 'local', 'kgw', 'investigates', 'near', 'me', 'health', 'the', 'story', 'nation', 'world', 'politics', 'life', 'community', 'entertainment', 'feature', 'late', 'news', 'story', 'voter', 'shake', 'newberg', 'school', 'board', 'follow', 'string', 'controversy', 'afterscho... fake tesla ad us ai create false ryan reynolds endorsement kgwcom skip navigation share facebook share twitter share sm share email navigation news back local kgw investigates near me health the story nation world politics life community entertainment feature late news story voter shake newberg school board follow string controversy afterschool pride event cancel portland elementary school due threat weather back forecast radar 10day hourly map closing delay traffic weather cam late weather ... -1 [('ai', 0.017365114695074534), ('the', 0.01403196309735906), ('news', 0.01170961413958012), ('new', 0.01071867775964026), ('use', 0.01002466825383406), ('technology', 0.00972014193210006), ('say', 0.009371157430697986), ('company', 0.009158552706871721), ('data', 0.00904095772736404), ('intelligence', 0.0086232045398164)] 2023-05-01
6840 2021-05-09 iiit delhi offer pg diploma data science ai techstory iiit delhi offer pg diploma data science ai techstory send tip call tech writer advertise game search log join news crypto gadget game car ai video startup business how log in register remember me lose password yes add mail list password email news crypto gadget game car ai video startup business how homefuture techai iiit delhi offer pg diploma data science aisandra there donymay 2021aifuture tech facebook twitter linkedin email whatsapp reddit flipboardartificial intelligence gradually mov... 9 547 0.182 0.696 0.121 -0.9884 Negative 0 0 0 ['iiit', 'delhi', 'offer', 'pg', 'diploma', 'data', 'science', 'ai', 'techstory', 'send', 'tip', 'call', 'tech', 'writer', 'advertise', 'game', 'search', 'log', 'join', 'news', 'crypto', 'gadget', 'game', 'car', 'ai', 'video', 'startup', 'business', 'how', 'log', 'in', 'register', 'remember', 'me', 'lose', 'password', 'yes', 'add', 'mail', 'list', 'password', 'email', 'news', 'crypto', 'gadget', 'game', 'car', 'ai', 'video', 'startup', 'business', 'how', 'homefuture', 'techai', 'iiit', 'delh... iiit delhi offer pg diploma data science ai techstory send tip call tech writer advertise game search log join news crypto gadget game car ai video startup business how log in register remember me lose password yes add mail list password email news crypto gadget game car ai video startup business how homefuture techai iiit delhi offer pg diploma data science aisandra there donymay 2021aifuture tech facebook twitter linkedin email whatsapp reddit flipboardartificial intelligence gradually mov... -1 [('ai', 0.017365114695074534), ('the', 0.01403196309735906), ('news', 0.01170961413958012), ('new', 0.01071867775964026), ('use', 0.01002466825383406), ('technology', 0.00972014193210006), ('say', 0.009371157430697986), ('company', 0.009158552706871721), ('data', 0.00904095772736404), ('intelligence', 0.0086232045398164)] 2021-05-01
5834 2023-01-30 chatgpt soon replace software engineer openai reportedly training ai code scale the financial express chatgpt soon replace software engineer openai reportedly training ai code scale the financial express english english business business insurance insurance the financial express follow facebook twitter linkedin homeelections 2022budget 2023market index nifty sensex cafe invest commodity ipo news invest banking insurance income tax mutual railway aviation travel tourism health photo video audio web story auto web story paperfrom the print international edits column opinion fe politics economy... 14 1014 0.105 0.790 0.105 -0.8875 Negative 0 0 0 ['chatgpt', 'soon', 'replace', 'software', 'engineer', 'openai', 'reportedly', 'training', 'ai', 'code', 'scale', 'the', 'financial', 'express', 'english', 'english', 'business', 'business', 'insurance', 'insurance', 'the', 'financial', 'express', 'follow', 'facebook', 'twitter', 'linkedin', 'homeelections', '2022budget', '2023market', 'index', 'nifty', 'sensex', 'cafe', 'invest', 'commodity', 'ipo', 'news', 'invest', 'banking', 'insurance', 'income', 'tax', 'mutual', 'railway', 'aviation', ... chatgpt soon replace software engineer openai reportedly training ai code scale the financial express english english business business insurance insurance the financial express follow facebook twitter linkedin homeelections 2022budget 2023market index nifty sensex cafe invest commodity ipo news invest banking insurance income tax mutual railway aviation travel tourism health photo video audio web story auto web story paperfrom the print international edits column opinion fe politics economy... -1 [('ai', 0.017365114695074534), ('the', 0.01403196309735906), ('news', 0.01170961413958012), ('new', 0.01071867775964026), ('use', 0.01002466825383406), ('technology', 0.00972014193210006), ('say', 0.009371157430697986), ('company', 0.009158552706871721), ('data', 0.00904095772736404), ('intelligence', 0.0086232045398164)] 2023-01-01
In [ ]:
# Extract and format month and year
negative_sentiments['Month_Year'] = negative_sentiments['date'].dt.strftime('%b %Y')

# Convert 'Month_Year' column to datetime format
negative_sentiments['Month_Year'] = pd.to_datetime(negative_sentiments['Month_Year'])

plt.figure(figsize=(10, 6))

# Count the number of articles per month-year
filtered_df = negative_sentiments[negative_sentiments['Bert_topics'] == -1]
monthly_counts = filtered_df['Month_Year'].value_counts()

# Sort the index (datetime values) in chronological order
monthly_counts = monthly_counts.sort_index()

# Create the bar plot using Seaborn
ax = sns.barplot(x=monthly_counts.index, y=monthly_counts.values, color='red')

plt.xlabel('Month Year')
plt.ylabel('Number of Negative Articles')
plt.title('Monthly Distribution of Positive Articles Over Time')

# Reformat the x-axis labels to 'Jan 2020' format
ax.set_xticklabels(monthly_counts.index.strftime('%b %Y'), rotation=90)

plt.tight_layout()
plt.show()
In [ ]:
 
In [ ]:
negative_sentiments[negative_sentiments['Bert_topics'] == 1].sample(n = 3)
Out[ ]:
date clean_lem_title clean_lem_text title_length text_length neg neu pos compound sentiment sentiments_SVM Sentiments_LR Sentiments_NB text_tokens text_tokens_string Bert_topics Bert_topics_words Month_Year
4510 2023-04-21 michael schumacher ai interview incredibly dangerous show poison bot weaponised warns expert the sun michael schumacher ai interview incredibly dangerous show poison bot weaponised warns expert the sunjump directly contentsign inus edition uk editionscottish sunirish azall footballall newsschuey scandal michael schumacher ai interview incredibly dangerous show poison bot weaponised warns expert aliki krateroutaryn pedlerpublished et apr 2023updated et apr 2023a shameless aigenerated michael schumacher interview show poison bot weaponised expert warn german magazine die aktuelle slam claim f... 14 739 0.099 0.801 0.099 -0.9514 Negative 0 0 0 ['michael', 'schumacher', 'ai', 'interview', 'incredibly', 'dangerous', 'show', 'poison', 'bot', 'weaponised', 'warns', 'expert', 'the', 'sunjump', 'directly', 'contentsign', 'inus', 'edition', 'uk', 'editionscottish', 'sunirish', 'azall', 'footballall', 'newsschuey', 'scandal', 'michael', 'schumacher', 'ai', 'interview', 'incredibly', 'dangerous', 'show', 'poison', 'bot', 'weaponised', 'warns', 'expert', 'aliki', 'krateroutaryn', 'pedlerpublished', 'et', 'apr', '2023updated', 'et', 'apr', '... michael schumacher ai interview incredibly dangerous show poison bot weaponised warns expert the sunjump directly contentsign inus edition uk editionscottish sunirish azall footballall newsschuey scandal michael schumacher ai interview incredibly dangerous show poison bot weaponised warns expert aliki krateroutaryn pedlerpublished et apr 2023updated et apr 2023a shameless aigenerated michael schumacher interview show poison bot weaponised expert warn german magazine die aktuelle slam claim f... 1 [('ai', 0.027785748301818083), ('use', 0.014374109021231108), ('the', 0.014284417492811837), ('technology', 0.012615994666732595), ('say', 0.012044907998800456), ('data', 0.01171675334827808), ('intelligence', 0.010534199611368747), ('new', 0.010047718620800937), ('business', 0.009615154504210398), ('artificial', 0.009446930707573877)] 2023-04-01
3887 2023-07-04 un council hold first meeting potential threat artificial intelligence global peace et cio un council hold first meeting potential threat artificial intelligence global peace et cio we use cooky ensure best experience we use cooky track technology improve browsing experience site show personalize content target ad analyze site traffic understand audience come you also read privacy policy we use cooky ensure best experience website by choose accept continue website consent use cooky term condition analytics performance cooky target advertising cooky india southeast asia login get a... 13 779 0.157 0.692 0.151 -0.9798 Negative 0 0 0 ['un', 'council', 'hold', 'first', 'meeting', 'potential', 'threat', 'artificial', 'intelligence', 'global', 'peace', 'et', 'cio', 'we', 'use', 'cooky', 'ensure', 'best', 'experience', 'we', 'use', 'cooky', 'track', 'technology', 'improve', 'browsing', 'experience', 'site', 'show', 'personalize', 'content', 'target', 'ad', 'analyze', 'site', 'traffic', 'understand', 'audience', 'come', 'you', 'also', 'read', 'privacy', 'policy', 'we', 'use', 'cooky', 'ensure', 'best', 'experience', 'website'... un council hold first meeting potential threat artificial intelligence global peace et cio we use cooky ensure best experience we use cooky track technology improve browsing experience site show personalize content target ad analyze site traffic understand audience come you also read privacy policy we use cooky ensure best experience website by choose accept continue website consent use cooky term condition analytics performance cooky target advertising cooky india southeast asia login get a... 1 [('ai', 0.027785748301818083), ('use', 0.014374109021231108), ('the', 0.014284417492811837), ('technology', 0.012615994666732595), ('say', 0.012044907998800456), ('data', 0.01171675334827808), ('intelligence', 0.010534199611368747), ('new', 0.010047718620800937), ('business', 0.009615154504210398), ('artificial', 0.009446930707573877)] 2023-07-01
3988 2023-06-27 use of ai in nuclear weapon extremely dangerous may lead to catastrophic result un urdupoint use of ai in nuclear weapon extremely dangerous may lead to catastrophic result un urdupoint home news pakistan today pakistanpolitics newscrime newsmiddle eastsaudi arabia newsuae newsbahrain newskuwait newsqatar newsegypt newsjordan newspalestine newslebanon newsiraq newssyria newsyemen newsworldpolitics newscrime newssportspsl newspsl newspsl newspsl newspsl newspsl newscricket newsfootball newshockey newsmiscellaneous newshollywood newsbollywood newslollywood newsfashion newshealtheducat... 15 624 0.128 0.763 0.109 -0.9284 Negative 0 0 0 ['use', 'of', 'ai', 'in', 'nuclear', 'weapon', 'extremely', 'dangerous', 'may', 'lead', 'to', 'catastrophic', 'result', 'un', 'urdupoint', 'home', 'news', 'pakistan', 'today', 'pakistanpolitics', 'newscrime', 'newsmiddle', 'eastsaudi', 'arabia', 'newsuae', 'newsbahrain', 'newskuwait', 'newsqatar', 'newsegypt', 'newsjordan', 'newspalestine', 'newslebanon', 'newsiraq', 'newssyria', 'newsyemen', 'newsworldpolitics', 'newscrime', 'newssportspsl', 'newspsl', 'newspsl', 'newspsl', 'newspsl', 'news... use of ai in nuclear weapon extremely dangerous may lead to catastrophic result un urdupoint home news pakistan today pakistanpolitics newscrime newsmiddle eastsaudi arabia newsuae newsbahrain newskuwait newsqatar newsegypt newsjordan newspalestine newslebanon newsiraq newssyria newsyemen newsworldpolitics newscrime newssportspsl newspsl newspsl newspsl newspsl newspsl newscricket newsfootball newshockey newsmiscellaneous newshollywood newsbollywood newslollywood newsfashion newshealtheducat... 1 [('ai', 0.027785748301818083), ('use', 0.014374109021231108), ('the', 0.014284417492811837), ('technology', 0.012615994666732595), ('say', 0.012044907998800456), ('data', 0.01171675334827808), ('intelligence', 0.010534199611368747), ('new', 0.010047718620800937), ('business', 0.009615154504210398), ('artificial', 0.009446930707573877)] 2023-06-01
In [ ]:
# Extract and format month and year
negative_sentiments['Month_Year'] = negative_sentiments['date'].dt.strftime('%b %Y')

# Convert 'Month_Year' column to datetime format
negative_sentiments['Month_Year'] = pd.to_datetime(negative_sentiments['Month_Year'])

plt.figure(figsize=(10, 6))

# Count the number of articles per month-year
filtered_df = negative_sentiments[negative_sentiments['Bert_topics'] == 1]
monthly_counts = filtered_df['Month_Year'].value_counts()

# Sort the index (datetime values) in chronological order
monthly_counts = monthly_counts.sort_index()

# Create the bar plot using Seaborn
ax = sns.barplot(x=monthly_counts.index, y=monthly_counts.values, color='red')

plt.xlabel('Month Year')
plt.ylabel('Number of Negative Articles')
plt.title('Monthly Distribution of Positive Articles Over Time')

# Reformat the x-axis labels to 'Jan 2020' format
ax.set_xticklabels(monthly_counts.index.strftime('%b %Y'), rotation=90)

plt.tight_layout()
plt.show()